Fix indentation issues seen by -Wmisleading-indentation
[official-gcc.git] / gcc / config / i386 / i386.c
blob627ef6537ddd49461a32c88b919f136179a723e3
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "hash-set.h"
26 #include "machmode.h"
27 #include "vec.h"
28 #include "double-int.h"
29 #include "input.h"
30 #include "alias.h"
31 #include "symtab.h"
32 #include "wide-int.h"
33 #include "inchash.h"
34 #include "tree.h"
35 #include "fold-const.h"
36 #include "stringpool.h"
37 #include "attribs.h"
38 #include "calls.h"
39 #include "stor-layout.h"
40 #include "varasm.h"
41 #include "tm_p.h"
42 #include "regs.h"
43 #include "hard-reg-set.h"
44 #include "insn-config.h"
45 #include "conditions.h"
46 #include "output.h"
47 #include "insn-codes.h"
48 #include "insn-attr.h"
49 #include "flags.h"
50 #include "except.h"
51 #include "function.h"
52 #include "recog.h"
53 #include "hashtab.h"
54 #include "statistics.h"
55 #include "real.h"
56 #include "fixed-value.h"
57 #include "expmed.h"
58 #include "dojump.h"
59 #include "explow.h"
60 #include "emit-rtl.h"
61 #include "stmt.h"
62 #include "expr.h"
63 #include "optabs.h"
64 #include "diagnostic-core.h"
65 #include "toplev.h"
66 #include "predict.h"
67 #include "dominance.h"
68 #include "cfg.h"
69 #include "cfgrtl.h"
70 #include "cfganal.h"
71 #include "lcm.h"
72 #include "cfgbuild.h"
73 #include "cfgcleanup.h"
74 #include "basic-block.h"
75 #include "ggc.h"
76 #include "target.h"
77 #include "target-def.h"
78 #include "common/common-target.h"
79 #include "langhooks.h"
80 #include "reload.h"
81 #include "hash-map.h"
82 #include "is-a.h"
83 #include "plugin-api.h"
84 #include "ipa-ref.h"
85 #include "cgraph.h"
86 #include "hash-table.h"
87 #include "tree-ssa-alias.h"
88 #include "internal-fn.h"
89 #include "gimple-fold.h"
90 #include "tree-eh.h"
91 #include "gimple-expr.h"
92 #include "gimple.h"
93 #include "gimplify.h"
94 #include "cfgloop.h"
95 #include "dwarf2.h"
96 #include "df.h"
97 #include "tm-constrs.h"
98 #include "params.h"
99 #include "cselib.h"
100 #include "debug.h"
101 #include "sched-int.h"
102 #include "sbitmap.h"
103 #include "fibheap.h"
104 #include "opts.h"
105 #include "diagnostic.h"
106 #include "dumpfile.h"
107 #include "tree-pass.h"
108 #include "context.h"
109 #include "pass_manager.h"
110 #include "target-globals.h"
111 #include "tree-vectorizer.h"
112 #include "shrink-wrap.h"
113 #include "builtins.h"
114 #include "rtl-iter.h"
115 #include "tree-iterator.h"
116 #include "tree-chkp.h"
117 #include "rtl-chkp.h"
119 static rtx legitimize_dllimport_symbol (rtx, bool);
120 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
121 static rtx legitimize_pe_coff_symbol (rtx, bool);
123 #ifndef CHECK_STACK_LIMIT
124 #define CHECK_STACK_LIMIT (-1)
125 #endif
127 /* Return index of given mode in mult and division cost tables. */
128 #define MODE_INDEX(mode) \
129 ((mode) == QImode ? 0 \
130 : (mode) == HImode ? 1 \
131 : (mode) == SImode ? 2 \
132 : (mode) == DImode ? 3 \
133 : 4)
135 /* Processor costs (relative to an add) */
136 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
137 #define COSTS_N_BYTES(N) ((N) * 2)
139 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
141 static stringop_algs ix86_size_memcpy[2] = {
142 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
143 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
144 static stringop_algs ix86_size_memset[2] = {
145 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
146 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
148 const
149 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
150 COSTS_N_BYTES (2), /* cost of an add instruction */
151 COSTS_N_BYTES (3), /* cost of a lea instruction */
152 COSTS_N_BYTES (2), /* variable shift costs */
153 COSTS_N_BYTES (3), /* constant shift costs */
154 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
155 COSTS_N_BYTES (3), /* HI */
156 COSTS_N_BYTES (3), /* SI */
157 COSTS_N_BYTES (3), /* DI */
158 COSTS_N_BYTES (5)}, /* other */
159 0, /* cost of multiply per each bit set */
160 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
161 COSTS_N_BYTES (3), /* HI */
162 COSTS_N_BYTES (3), /* SI */
163 COSTS_N_BYTES (3), /* DI */
164 COSTS_N_BYTES (5)}, /* other */
165 COSTS_N_BYTES (3), /* cost of movsx */
166 COSTS_N_BYTES (3), /* cost of movzx */
167 0, /* "large" insn */
168 2, /* MOVE_RATIO */
169 2, /* cost for loading QImode using movzbl */
170 {2, 2, 2}, /* cost of loading integer registers
171 in QImode, HImode and SImode.
172 Relative to reg-reg move (2). */
173 {2, 2, 2}, /* cost of storing integer registers */
174 2, /* cost of reg,reg fld/fst */
175 {2, 2, 2}, /* cost of loading fp registers
176 in SFmode, DFmode and XFmode */
177 {2, 2, 2}, /* cost of storing fp registers
178 in SFmode, DFmode and XFmode */
179 3, /* cost of moving MMX register */
180 {3, 3}, /* cost of loading MMX registers
181 in SImode and DImode */
182 {3, 3}, /* cost of storing MMX registers
183 in SImode and DImode */
184 3, /* cost of moving SSE register */
185 {3, 3, 3}, /* cost of loading SSE registers
186 in SImode, DImode and TImode */
187 {3, 3, 3}, /* cost of storing SSE registers
188 in SImode, DImode and TImode */
189 3, /* MMX or SSE register to integer */
190 0, /* size of l1 cache */
191 0, /* size of l2 cache */
192 0, /* size of prefetch block */
193 0, /* number of parallel prefetches */
194 2, /* Branch cost */
195 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
196 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
197 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
198 COSTS_N_BYTES (2), /* cost of FABS instruction. */
199 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
200 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
201 ix86_size_memcpy,
202 ix86_size_memset,
203 1, /* scalar_stmt_cost. */
204 1, /* scalar load_cost. */
205 1, /* scalar_store_cost. */
206 1, /* vec_stmt_cost. */
207 1, /* vec_to_scalar_cost. */
208 1, /* scalar_to_vec_cost. */
209 1, /* vec_align_load_cost. */
210 1, /* vec_unalign_load_cost. */
211 1, /* vec_store_cost. */
212 1, /* cond_taken_branch_cost. */
213 1, /* cond_not_taken_branch_cost. */
216 /* Processor costs (relative to an add) */
217 static stringop_algs i386_memcpy[2] = {
218 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
219 DUMMY_STRINGOP_ALGS};
220 static stringop_algs i386_memset[2] = {
221 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
222 DUMMY_STRINGOP_ALGS};
224 static const
225 struct processor_costs i386_cost = { /* 386 specific costs */
226 COSTS_N_INSNS (1), /* cost of an add instruction */
227 COSTS_N_INSNS (1), /* cost of a lea instruction */
228 COSTS_N_INSNS (3), /* variable shift costs */
229 COSTS_N_INSNS (2), /* constant shift costs */
230 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
231 COSTS_N_INSNS (6), /* HI */
232 COSTS_N_INSNS (6), /* SI */
233 COSTS_N_INSNS (6), /* DI */
234 COSTS_N_INSNS (6)}, /* other */
235 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
236 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
237 COSTS_N_INSNS (23), /* HI */
238 COSTS_N_INSNS (23), /* SI */
239 COSTS_N_INSNS (23), /* DI */
240 COSTS_N_INSNS (23)}, /* other */
241 COSTS_N_INSNS (3), /* cost of movsx */
242 COSTS_N_INSNS (2), /* cost of movzx */
243 15, /* "large" insn */
244 3, /* MOVE_RATIO */
245 4, /* cost for loading QImode using movzbl */
246 {2, 4, 2}, /* cost of loading integer registers
247 in QImode, HImode and SImode.
248 Relative to reg-reg move (2). */
249 {2, 4, 2}, /* cost of storing integer registers */
250 2, /* cost of reg,reg fld/fst */
251 {8, 8, 8}, /* cost of loading fp registers
252 in SFmode, DFmode and XFmode */
253 {8, 8, 8}, /* cost of storing fp registers
254 in SFmode, DFmode and XFmode */
255 2, /* cost of moving MMX register */
256 {4, 8}, /* cost of loading MMX registers
257 in SImode and DImode */
258 {4, 8}, /* cost of storing MMX registers
259 in SImode and DImode */
260 2, /* cost of moving SSE register */
261 {4, 8, 16}, /* cost of loading SSE registers
262 in SImode, DImode and TImode */
263 {4, 8, 16}, /* cost of storing SSE registers
264 in SImode, DImode and TImode */
265 3, /* MMX or SSE register to integer */
266 0, /* size of l1 cache */
267 0, /* size of l2 cache */
268 0, /* size of prefetch block */
269 0, /* number of parallel prefetches */
270 1, /* Branch cost */
271 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
272 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
273 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
274 COSTS_N_INSNS (22), /* cost of FABS instruction. */
275 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
276 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
277 i386_memcpy,
278 i386_memset,
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
292 static stringop_algs i486_memcpy[2] = {
293 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
294 DUMMY_STRINGOP_ALGS};
295 static stringop_algs i486_memset[2] = {
296 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
297 DUMMY_STRINGOP_ALGS};
299 static const
300 struct processor_costs i486_cost = { /* 486 specific costs */
301 COSTS_N_INSNS (1), /* cost of an add instruction */
302 COSTS_N_INSNS (1), /* cost of a lea instruction */
303 COSTS_N_INSNS (3), /* variable shift costs */
304 COSTS_N_INSNS (2), /* constant shift costs */
305 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
306 COSTS_N_INSNS (12), /* HI */
307 COSTS_N_INSNS (12), /* SI */
308 COSTS_N_INSNS (12), /* DI */
309 COSTS_N_INSNS (12)}, /* other */
310 1, /* cost of multiply per each bit set */
311 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
312 COSTS_N_INSNS (40), /* HI */
313 COSTS_N_INSNS (40), /* SI */
314 COSTS_N_INSNS (40), /* DI */
315 COSTS_N_INSNS (40)}, /* other */
316 COSTS_N_INSNS (3), /* cost of movsx */
317 COSTS_N_INSNS (2), /* cost of movzx */
318 15, /* "large" insn */
319 3, /* MOVE_RATIO */
320 4, /* cost for loading QImode using movzbl */
321 {2, 4, 2}, /* cost of loading integer registers
322 in QImode, HImode and SImode.
323 Relative to reg-reg move (2). */
324 {2, 4, 2}, /* cost of storing integer registers */
325 2, /* cost of reg,reg fld/fst */
326 {8, 8, 8}, /* cost of loading fp registers
327 in SFmode, DFmode and XFmode */
328 {8, 8, 8}, /* cost of storing fp registers
329 in SFmode, DFmode and XFmode */
330 2, /* cost of moving MMX register */
331 {4, 8}, /* cost of loading MMX registers
332 in SImode and DImode */
333 {4, 8}, /* cost of storing MMX registers
334 in SImode and DImode */
335 2, /* cost of moving SSE register */
336 {4, 8, 16}, /* cost of loading SSE registers
337 in SImode, DImode and TImode */
338 {4, 8, 16}, /* cost of storing SSE registers
339 in SImode, DImode and TImode */
340 3, /* MMX or SSE register to integer */
341 4, /* size of l1 cache. 486 has 8kB cache
342 shared for code and data, so 4kB is
343 not really precise. */
344 4, /* size of l2 cache */
345 0, /* size of prefetch block */
346 0, /* number of parallel prefetches */
347 1, /* Branch cost */
348 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
349 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
350 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
351 COSTS_N_INSNS (3), /* cost of FABS instruction. */
352 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
353 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
354 i486_memcpy,
355 i486_memset,
356 1, /* scalar_stmt_cost. */
357 1, /* scalar load_cost. */
358 1, /* scalar_store_cost. */
359 1, /* vec_stmt_cost. */
360 1, /* vec_to_scalar_cost. */
361 1, /* scalar_to_vec_cost. */
362 1, /* vec_align_load_cost. */
363 2, /* vec_unalign_load_cost. */
364 1, /* vec_store_cost. */
365 3, /* cond_taken_branch_cost. */
366 1, /* cond_not_taken_branch_cost. */
369 static stringop_algs pentium_memcpy[2] = {
370 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
371 DUMMY_STRINGOP_ALGS};
372 static stringop_algs pentium_memset[2] = {
373 {libcall, {{-1, rep_prefix_4_byte, false}}},
374 DUMMY_STRINGOP_ALGS};
376 static const
377 struct processor_costs pentium_cost = {
378 COSTS_N_INSNS (1), /* cost of an add instruction */
379 COSTS_N_INSNS (1), /* cost of a lea instruction */
380 COSTS_N_INSNS (4), /* variable shift costs */
381 COSTS_N_INSNS (1), /* constant shift costs */
382 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
383 COSTS_N_INSNS (11), /* HI */
384 COSTS_N_INSNS (11), /* SI */
385 COSTS_N_INSNS (11), /* DI */
386 COSTS_N_INSNS (11)}, /* other */
387 0, /* cost of multiply per each bit set */
388 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
389 COSTS_N_INSNS (25), /* HI */
390 COSTS_N_INSNS (25), /* SI */
391 COSTS_N_INSNS (25), /* DI */
392 COSTS_N_INSNS (25)}, /* other */
393 COSTS_N_INSNS (3), /* cost of movsx */
394 COSTS_N_INSNS (2), /* cost of movzx */
395 8, /* "large" insn */
396 6, /* MOVE_RATIO */
397 6, /* cost for loading QImode using movzbl */
398 {2, 4, 2}, /* cost of loading integer registers
399 in QImode, HImode and SImode.
400 Relative to reg-reg move (2). */
401 {2, 4, 2}, /* cost of storing integer registers */
402 2, /* cost of reg,reg fld/fst */
403 {2, 2, 6}, /* cost of loading fp registers
404 in SFmode, DFmode and XFmode */
405 {4, 4, 6}, /* cost of storing fp registers
406 in SFmode, DFmode and XFmode */
407 8, /* cost of moving MMX register */
408 {8, 8}, /* cost of loading MMX registers
409 in SImode and DImode */
410 {8, 8}, /* cost of storing MMX registers
411 in SImode and DImode */
412 2, /* cost of moving SSE register */
413 {4, 8, 16}, /* cost of loading SSE registers
414 in SImode, DImode and TImode */
415 {4, 8, 16}, /* cost of storing SSE registers
416 in SImode, DImode and TImode */
417 3, /* MMX or SSE register to integer */
418 8, /* size of l1 cache. */
419 8, /* size of l2 cache */
420 0, /* size of prefetch block */
421 0, /* number of parallel prefetches */
422 2, /* Branch cost */
423 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
424 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
425 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
426 COSTS_N_INSNS (1), /* cost of FABS instruction. */
427 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
428 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
429 pentium_memcpy,
430 pentium_memset,
431 1, /* scalar_stmt_cost. */
432 1, /* scalar load_cost. */
433 1, /* scalar_store_cost. */
434 1, /* vec_stmt_cost. */
435 1, /* vec_to_scalar_cost. */
436 1, /* scalar_to_vec_cost. */
437 1, /* vec_align_load_cost. */
438 2, /* vec_unalign_load_cost. */
439 1, /* vec_store_cost. */
440 3, /* cond_taken_branch_cost. */
441 1, /* cond_not_taken_branch_cost. */
444 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
445 (we ensure the alignment). For small blocks inline loop is still a
446 noticeable win, for bigger blocks either rep movsl or rep movsb is
447 way to go. Rep movsb has apparently more expensive startup time in CPU,
448 but after 4K the difference is down in the noise. */
449 static stringop_algs pentiumpro_memcpy[2] = {
450 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
451 {8192, rep_prefix_4_byte, false},
452 {-1, rep_prefix_1_byte, false}}},
453 DUMMY_STRINGOP_ALGS};
454 static stringop_algs pentiumpro_memset[2] = {
455 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
456 {8192, rep_prefix_4_byte, false},
457 {-1, libcall, false}}},
458 DUMMY_STRINGOP_ALGS};
459 static const
460 struct processor_costs pentiumpro_cost = {
461 COSTS_N_INSNS (1), /* cost of an add instruction */
462 COSTS_N_INSNS (1), /* cost of a lea instruction */
463 COSTS_N_INSNS (1), /* variable shift costs */
464 COSTS_N_INSNS (1), /* constant shift costs */
465 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
466 COSTS_N_INSNS (4), /* HI */
467 COSTS_N_INSNS (4), /* SI */
468 COSTS_N_INSNS (4), /* DI */
469 COSTS_N_INSNS (4)}, /* other */
470 0, /* cost of multiply per each bit set */
471 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
472 COSTS_N_INSNS (17), /* HI */
473 COSTS_N_INSNS (17), /* SI */
474 COSTS_N_INSNS (17), /* DI */
475 COSTS_N_INSNS (17)}, /* other */
476 COSTS_N_INSNS (1), /* cost of movsx */
477 COSTS_N_INSNS (1), /* cost of movzx */
478 8, /* "large" insn */
479 6, /* MOVE_RATIO */
480 2, /* cost for loading QImode using movzbl */
481 {4, 4, 4}, /* cost of loading integer registers
482 in QImode, HImode and SImode.
483 Relative to reg-reg move (2). */
484 {2, 2, 2}, /* cost of storing integer registers */
485 2, /* cost of reg,reg fld/fst */
486 {2, 2, 6}, /* cost of loading fp registers
487 in SFmode, DFmode and XFmode */
488 {4, 4, 6}, /* cost of storing fp registers
489 in SFmode, DFmode and XFmode */
490 2, /* cost of moving MMX register */
491 {2, 2}, /* cost of loading MMX registers
492 in SImode and DImode */
493 {2, 2}, /* cost of storing MMX registers
494 in SImode and DImode */
495 2, /* cost of moving SSE register */
496 {2, 2, 8}, /* cost of loading SSE registers
497 in SImode, DImode and TImode */
498 {2, 2, 8}, /* cost of storing SSE registers
499 in SImode, DImode and TImode */
500 3, /* MMX or SSE register to integer */
501 8, /* size of l1 cache. */
502 256, /* size of l2 cache */
503 32, /* size of prefetch block */
504 6, /* number of parallel prefetches */
505 2, /* Branch cost */
506 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
507 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
508 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
509 COSTS_N_INSNS (2), /* cost of FABS instruction. */
510 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
511 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
512 pentiumpro_memcpy,
513 pentiumpro_memset,
514 1, /* scalar_stmt_cost. */
515 1, /* scalar load_cost. */
516 1, /* scalar_store_cost. */
517 1, /* vec_stmt_cost. */
518 1, /* vec_to_scalar_cost. */
519 1, /* scalar_to_vec_cost. */
520 1, /* vec_align_load_cost. */
521 2, /* vec_unalign_load_cost. */
522 1, /* vec_store_cost. */
523 3, /* cond_taken_branch_cost. */
524 1, /* cond_not_taken_branch_cost. */
527 static stringop_algs geode_memcpy[2] = {
528 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
529 DUMMY_STRINGOP_ALGS};
530 static stringop_algs geode_memset[2] = {
531 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
532 DUMMY_STRINGOP_ALGS};
533 static const
534 struct processor_costs geode_cost = {
535 COSTS_N_INSNS (1), /* cost of an add instruction */
536 COSTS_N_INSNS (1), /* cost of a lea instruction */
537 COSTS_N_INSNS (2), /* variable shift costs */
538 COSTS_N_INSNS (1), /* constant shift costs */
539 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
540 COSTS_N_INSNS (4), /* HI */
541 COSTS_N_INSNS (7), /* SI */
542 COSTS_N_INSNS (7), /* DI */
543 COSTS_N_INSNS (7)}, /* other */
544 0, /* cost of multiply per each bit set */
545 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
546 COSTS_N_INSNS (23), /* HI */
547 COSTS_N_INSNS (39), /* SI */
548 COSTS_N_INSNS (39), /* DI */
549 COSTS_N_INSNS (39)}, /* other */
550 COSTS_N_INSNS (1), /* cost of movsx */
551 COSTS_N_INSNS (1), /* cost of movzx */
552 8, /* "large" insn */
553 4, /* MOVE_RATIO */
554 1, /* cost for loading QImode using movzbl */
555 {1, 1, 1}, /* cost of loading integer registers
556 in QImode, HImode and SImode.
557 Relative to reg-reg move (2). */
558 {1, 1, 1}, /* cost of storing integer registers */
559 1, /* cost of reg,reg fld/fst */
560 {1, 1, 1}, /* cost of loading fp registers
561 in SFmode, DFmode and XFmode */
562 {4, 6, 6}, /* cost of storing fp registers
563 in SFmode, DFmode and XFmode */
565 1, /* cost of moving MMX register */
566 {1, 1}, /* cost of loading MMX registers
567 in SImode and DImode */
568 {1, 1}, /* cost of storing MMX registers
569 in SImode and DImode */
570 1, /* cost of moving SSE register */
571 {1, 1, 1}, /* cost of loading SSE registers
572 in SImode, DImode and TImode */
573 {1, 1, 1}, /* cost of storing SSE registers
574 in SImode, DImode and TImode */
575 1, /* MMX or SSE register to integer */
576 64, /* size of l1 cache. */
577 128, /* size of l2 cache. */
578 32, /* size of prefetch block */
579 1, /* number of parallel prefetches */
580 1, /* Branch cost */
581 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
582 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
583 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
584 COSTS_N_INSNS (1), /* cost of FABS instruction. */
585 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
586 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
587 geode_memcpy,
588 geode_memset,
589 1, /* scalar_stmt_cost. */
590 1, /* scalar load_cost. */
591 1, /* scalar_store_cost. */
592 1, /* vec_stmt_cost. */
593 1, /* vec_to_scalar_cost. */
594 1, /* scalar_to_vec_cost. */
595 1, /* vec_align_load_cost. */
596 2, /* vec_unalign_load_cost. */
597 1, /* vec_store_cost. */
598 3, /* cond_taken_branch_cost. */
599 1, /* cond_not_taken_branch_cost. */
602 static stringop_algs k6_memcpy[2] = {
603 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
604 DUMMY_STRINGOP_ALGS};
605 static stringop_algs k6_memset[2] = {
606 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
607 DUMMY_STRINGOP_ALGS};
608 static const
609 struct processor_costs k6_cost = {
610 COSTS_N_INSNS (1), /* cost of an add instruction */
611 COSTS_N_INSNS (2), /* cost of a lea instruction */
612 COSTS_N_INSNS (1), /* variable shift costs */
613 COSTS_N_INSNS (1), /* constant shift costs */
614 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
615 COSTS_N_INSNS (3), /* HI */
616 COSTS_N_INSNS (3), /* SI */
617 COSTS_N_INSNS (3), /* DI */
618 COSTS_N_INSNS (3)}, /* other */
619 0, /* cost of multiply per each bit set */
620 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
621 COSTS_N_INSNS (18), /* HI */
622 COSTS_N_INSNS (18), /* SI */
623 COSTS_N_INSNS (18), /* DI */
624 COSTS_N_INSNS (18)}, /* other */
625 COSTS_N_INSNS (2), /* cost of movsx */
626 COSTS_N_INSNS (2), /* cost of movzx */
627 8, /* "large" insn */
628 4, /* MOVE_RATIO */
629 3, /* cost for loading QImode using movzbl */
630 {4, 5, 4}, /* cost of loading integer registers
631 in QImode, HImode and SImode.
632 Relative to reg-reg move (2). */
633 {2, 3, 2}, /* cost of storing integer registers */
634 4, /* cost of reg,reg fld/fst */
635 {6, 6, 6}, /* cost of loading fp registers
636 in SFmode, DFmode and XFmode */
637 {4, 4, 4}, /* cost of storing fp registers
638 in SFmode, DFmode and XFmode */
639 2, /* cost of moving MMX register */
640 {2, 2}, /* cost of loading MMX registers
641 in SImode and DImode */
642 {2, 2}, /* cost of storing MMX registers
643 in SImode and DImode */
644 2, /* cost of moving SSE register */
645 {2, 2, 8}, /* cost of loading SSE registers
646 in SImode, DImode and TImode */
647 {2, 2, 8}, /* cost of storing SSE registers
648 in SImode, DImode and TImode */
649 6, /* MMX or SSE register to integer */
650 32, /* size of l1 cache. */
651 32, /* size of l2 cache. Some models
652 have integrated l2 cache, but
653 optimizing for k6 is not important
654 enough to worry about that. */
655 32, /* size of prefetch block */
656 1, /* number of parallel prefetches */
657 1, /* Branch cost */
658 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
659 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
660 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
661 COSTS_N_INSNS (2), /* cost of FABS instruction. */
662 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
663 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
664 k6_memcpy,
665 k6_memset,
666 1, /* scalar_stmt_cost. */
667 1, /* scalar load_cost. */
668 1, /* scalar_store_cost. */
669 1, /* vec_stmt_cost. */
670 1, /* vec_to_scalar_cost. */
671 1, /* scalar_to_vec_cost. */
672 1, /* vec_align_load_cost. */
673 2, /* vec_unalign_load_cost. */
674 1, /* vec_store_cost. */
675 3, /* cond_taken_branch_cost. */
676 1, /* cond_not_taken_branch_cost. */
679 /* For some reason, Athlon deals better with REP prefix (relative to loops)
680 compared to K8. Alignment becomes important after 8 bytes for memcpy and
681 128 bytes for memset. */
682 static stringop_algs athlon_memcpy[2] = {
683 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
684 DUMMY_STRINGOP_ALGS};
685 static stringop_algs athlon_memset[2] = {
686 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
687 DUMMY_STRINGOP_ALGS};
688 static const
689 struct processor_costs athlon_cost = {
690 COSTS_N_INSNS (1), /* cost of an add instruction */
691 COSTS_N_INSNS (2), /* cost of a lea instruction */
692 COSTS_N_INSNS (1), /* variable shift costs */
693 COSTS_N_INSNS (1), /* constant shift costs */
694 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
695 COSTS_N_INSNS (5), /* HI */
696 COSTS_N_INSNS (5), /* SI */
697 COSTS_N_INSNS (5), /* DI */
698 COSTS_N_INSNS (5)}, /* other */
699 0, /* cost of multiply per each bit set */
700 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
701 COSTS_N_INSNS (26), /* HI */
702 COSTS_N_INSNS (42), /* SI */
703 COSTS_N_INSNS (74), /* DI */
704 COSTS_N_INSNS (74)}, /* other */
705 COSTS_N_INSNS (1), /* cost of movsx */
706 COSTS_N_INSNS (1), /* cost of movzx */
707 8, /* "large" insn */
708 9, /* MOVE_RATIO */
709 4, /* cost for loading QImode using movzbl */
710 {3, 4, 3}, /* cost of loading integer registers
711 in QImode, HImode and SImode.
712 Relative to reg-reg move (2). */
713 {3, 4, 3}, /* cost of storing integer registers */
714 4, /* cost of reg,reg fld/fst */
715 {4, 4, 12}, /* cost of loading fp registers
716 in SFmode, DFmode and XFmode */
717 {6, 6, 8}, /* cost of storing fp registers
718 in SFmode, DFmode and XFmode */
719 2, /* cost of moving MMX register */
720 {4, 4}, /* cost of loading MMX registers
721 in SImode and DImode */
722 {4, 4}, /* cost of storing MMX registers
723 in SImode and DImode */
724 2, /* cost of moving SSE register */
725 {4, 4, 6}, /* cost of loading SSE registers
726 in SImode, DImode and TImode */
727 {4, 4, 5}, /* cost of storing SSE registers
728 in SImode, DImode and TImode */
729 5, /* MMX or SSE register to integer */
730 64, /* size of l1 cache. */
731 256, /* size of l2 cache. */
732 64, /* size of prefetch block */
733 6, /* number of parallel prefetches */
734 5, /* Branch cost */
735 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
736 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
737 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
738 COSTS_N_INSNS (2), /* cost of FABS instruction. */
739 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
740 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
741 athlon_memcpy,
742 athlon_memset,
743 1, /* scalar_stmt_cost. */
744 1, /* scalar load_cost. */
745 1, /* scalar_store_cost. */
746 1, /* vec_stmt_cost. */
747 1, /* vec_to_scalar_cost. */
748 1, /* scalar_to_vec_cost. */
749 1, /* vec_align_load_cost. */
750 2, /* vec_unalign_load_cost. */
751 1, /* vec_store_cost. */
752 3, /* cond_taken_branch_cost. */
753 1, /* cond_not_taken_branch_cost. */
756 /* K8 has optimized REP instruction for medium sized blocks, but for very
757 small blocks it is better to use loop. For large blocks, libcall can
758 do nontemporary accesses and beat inline considerably. */
759 static stringop_algs k8_memcpy[2] = {
760 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
761 {-1, rep_prefix_4_byte, false}}},
762 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
763 {-1, libcall, false}}}};
764 static stringop_algs k8_memset[2] = {
765 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
766 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
767 {libcall, {{48, unrolled_loop, false},
768 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
769 static const
770 struct processor_costs k8_cost = {
771 COSTS_N_INSNS (1), /* cost of an add instruction */
772 COSTS_N_INSNS (2), /* cost of a lea instruction */
773 COSTS_N_INSNS (1), /* variable shift costs */
774 COSTS_N_INSNS (1), /* constant shift costs */
775 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
776 COSTS_N_INSNS (4), /* HI */
777 COSTS_N_INSNS (3), /* SI */
778 COSTS_N_INSNS (4), /* DI */
779 COSTS_N_INSNS (5)}, /* other */
780 0, /* cost of multiply per each bit set */
781 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
782 COSTS_N_INSNS (26), /* HI */
783 COSTS_N_INSNS (42), /* SI */
784 COSTS_N_INSNS (74), /* DI */
785 COSTS_N_INSNS (74)}, /* other */
786 COSTS_N_INSNS (1), /* cost of movsx */
787 COSTS_N_INSNS (1), /* cost of movzx */
788 8, /* "large" insn */
789 9, /* MOVE_RATIO */
790 4, /* cost for loading QImode using movzbl */
791 {3, 4, 3}, /* cost of loading integer registers
792 in QImode, HImode and SImode.
793 Relative to reg-reg move (2). */
794 {3, 4, 3}, /* cost of storing integer registers */
795 4, /* cost of reg,reg fld/fst */
796 {4, 4, 12}, /* cost of loading fp registers
797 in SFmode, DFmode and XFmode */
798 {6, 6, 8}, /* cost of storing fp registers
799 in SFmode, DFmode and XFmode */
800 2, /* cost of moving MMX register */
801 {3, 3}, /* cost of loading MMX registers
802 in SImode and DImode */
803 {4, 4}, /* cost of storing MMX registers
804 in SImode and DImode */
805 2, /* cost of moving SSE register */
806 {4, 3, 6}, /* cost of loading SSE registers
807 in SImode, DImode and TImode */
808 {4, 4, 5}, /* cost of storing SSE registers
809 in SImode, DImode and TImode */
810 5, /* MMX or SSE register to integer */
811 64, /* size of l1 cache. */
812 512, /* size of l2 cache. */
813 64, /* size of prefetch block */
814 /* New AMD processors never drop prefetches; if they cannot be performed
815 immediately, they are queued. We set number of simultaneous prefetches
816 to a large constant to reflect this (it probably is not a good idea not
817 to limit number of prefetches at all, as their execution also takes some
818 time). */
819 100, /* number of parallel prefetches */
820 3, /* Branch cost */
821 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
822 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
823 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
824 COSTS_N_INSNS (2), /* cost of FABS instruction. */
825 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
826 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
828 k8_memcpy,
829 k8_memset,
830 4, /* scalar_stmt_cost. */
831 2, /* scalar load_cost. */
832 2, /* scalar_store_cost. */
833 5, /* vec_stmt_cost. */
834 0, /* vec_to_scalar_cost. */
835 2, /* scalar_to_vec_cost. */
836 2, /* vec_align_load_cost. */
837 3, /* vec_unalign_load_cost. */
838 3, /* vec_store_cost. */
839 3, /* cond_taken_branch_cost. */
840 2, /* cond_not_taken_branch_cost. */
843 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
844 very small blocks it is better to use loop. For large blocks, libcall can
845 do nontemporary accesses and beat inline considerably. */
846 static stringop_algs amdfam10_memcpy[2] = {
847 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
848 {-1, rep_prefix_4_byte, false}}},
849 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
850 {-1, libcall, false}}}};
851 static stringop_algs amdfam10_memset[2] = {
852 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
853 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
854 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
855 {-1, libcall, false}}}};
856 struct processor_costs amdfam10_cost = {
857 COSTS_N_INSNS (1), /* cost of an add instruction */
858 COSTS_N_INSNS (2), /* cost of a lea instruction */
859 COSTS_N_INSNS (1), /* variable shift costs */
860 COSTS_N_INSNS (1), /* constant shift costs */
861 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
862 COSTS_N_INSNS (4), /* HI */
863 COSTS_N_INSNS (3), /* SI */
864 COSTS_N_INSNS (4), /* DI */
865 COSTS_N_INSNS (5)}, /* other */
866 0, /* cost of multiply per each bit set */
867 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
868 COSTS_N_INSNS (35), /* HI */
869 COSTS_N_INSNS (51), /* SI */
870 COSTS_N_INSNS (83), /* DI */
871 COSTS_N_INSNS (83)}, /* other */
872 COSTS_N_INSNS (1), /* cost of movsx */
873 COSTS_N_INSNS (1), /* cost of movzx */
874 8, /* "large" insn */
875 9, /* MOVE_RATIO */
876 4, /* cost for loading QImode using movzbl */
877 {3, 4, 3}, /* cost of loading integer registers
878 in QImode, HImode and SImode.
879 Relative to reg-reg move (2). */
880 {3, 4, 3}, /* cost of storing integer registers */
881 4, /* cost of reg,reg fld/fst */
882 {4, 4, 12}, /* cost of loading fp registers
883 in SFmode, DFmode and XFmode */
884 {6, 6, 8}, /* cost of storing fp registers
885 in SFmode, DFmode and XFmode */
886 2, /* cost of moving MMX register */
887 {3, 3}, /* cost of loading MMX registers
888 in SImode and DImode */
889 {4, 4}, /* cost of storing MMX registers
890 in SImode and DImode */
891 2, /* cost of moving SSE register */
892 {4, 4, 3}, /* cost of loading SSE registers
893 in SImode, DImode and TImode */
894 {4, 4, 5}, /* cost of storing SSE registers
895 in SImode, DImode and TImode */
896 3, /* MMX or SSE register to integer */
897 /* On K8:
898 MOVD reg64, xmmreg Double FSTORE 4
899 MOVD reg32, xmmreg Double FSTORE 4
900 On AMDFAM10:
901 MOVD reg64, xmmreg Double FADD 3
902 1/1 1/1
903 MOVD reg32, xmmreg Double FADD 3
904 1/1 1/1 */
905 64, /* size of l1 cache. */
906 512, /* size of l2 cache. */
907 64, /* size of prefetch block */
908 /* New AMD processors never drop prefetches; if they cannot be performed
909 immediately, they are queued. We set number of simultaneous prefetches
910 to a large constant to reflect this (it probably is not a good idea not
911 to limit number of prefetches at all, as their execution also takes some
912 time). */
913 100, /* number of parallel prefetches */
914 2, /* Branch cost */
915 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
916 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
917 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
918 COSTS_N_INSNS (2), /* cost of FABS instruction. */
919 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
920 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
922 amdfam10_memcpy,
923 amdfam10_memset,
924 4, /* scalar_stmt_cost. */
925 2, /* scalar load_cost. */
926 2, /* scalar_store_cost. */
927 6, /* vec_stmt_cost. */
928 0, /* vec_to_scalar_cost. */
929 2, /* scalar_to_vec_cost. */
930 2, /* vec_align_load_cost. */
931 2, /* vec_unalign_load_cost. */
932 2, /* vec_store_cost. */
933 2, /* cond_taken_branch_cost. */
934 1, /* cond_not_taken_branch_cost. */
937 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
938 very small blocks it is better to use loop. For large blocks, libcall
939 can do nontemporary accesses and beat inline considerably. */
940 static stringop_algs bdver1_memcpy[2] = {
941 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
942 {-1, rep_prefix_4_byte, false}}},
943 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
944 {-1, libcall, false}}}};
945 static stringop_algs bdver1_memset[2] = {
946 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
947 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
948 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
949 {-1, libcall, false}}}};
951 const struct processor_costs bdver1_cost = {
952 COSTS_N_INSNS (1), /* cost of an add instruction */
953 COSTS_N_INSNS (1), /* cost of a lea instruction */
954 COSTS_N_INSNS (1), /* variable shift costs */
955 COSTS_N_INSNS (1), /* constant shift costs */
956 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
957 COSTS_N_INSNS (4), /* HI */
958 COSTS_N_INSNS (4), /* SI */
959 COSTS_N_INSNS (6), /* DI */
960 COSTS_N_INSNS (6)}, /* other */
961 0, /* cost of multiply per each bit set */
962 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
963 COSTS_N_INSNS (35), /* HI */
964 COSTS_N_INSNS (51), /* SI */
965 COSTS_N_INSNS (83), /* DI */
966 COSTS_N_INSNS (83)}, /* other */
967 COSTS_N_INSNS (1), /* cost of movsx */
968 COSTS_N_INSNS (1), /* cost of movzx */
969 8, /* "large" insn */
970 9, /* MOVE_RATIO */
971 4, /* cost for loading QImode using movzbl */
972 {5, 5, 4}, /* cost of loading integer registers
973 in QImode, HImode and SImode.
974 Relative to reg-reg move (2). */
975 {4, 4, 4}, /* cost of storing integer registers */
976 2, /* cost of reg,reg fld/fst */
977 {5, 5, 12}, /* cost of loading fp registers
978 in SFmode, DFmode and XFmode */
979 {4, 4, 8}, /* cost of storing fp registers
980 in SFmode, DFmode and XFmode */
981 2, /* cost of moving MMX register */
982 {4, 4}, /* cost of loading MMX registers
983 in SImode and DImode */
984 {4, 4}, /* cost of storing MMX registers
985 in SImode and DImode */
986 2, /* cost of moving SSE register */
987 {4, 4, 4}, /* cost of loading SSE registers
988 in SImode, DImode and TImode */
989 {4, 4, 4}, /* cost of storing SSE registers
990 in SImode, DImode and TImode */
991 2, /* MMX or SSE register to integer */
992 /* On K8:
993 MOVD reg64, xmmreg Double FSTORE 4
994 MOVD reg32, xmmreg Double FSTORE 4
995 On AMDFAM10:
996 MOVD reg64, xmmreg Double FADD 3
997 1/1 1/1
998 MOVD reg32, xmmreg Double FADD 3
999 1/1 1/1 */
1000 16, /* size of l1 cache. */
1001 2048, /* size of l2 cache. */
1002 64, /* size of prefetch block */
1003 /* New AMD processors never drop prefetches; if they cannot be performed
1004 immediately, they are queued. We set number of simultaneous prefetches
1005 to a large constant to reflect this (it probably is not a good idea not
1006 to limit number of prefetches at all, as their execution also takes some
1007 time). */
1008 100, /* number of parallel prefetches */
1009 2, /* Branch cost */
1010 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1011 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1012 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1013 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1014 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1015 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1017 bdver1_memcpy,
1018 bdver1_memset,
1019 6, /* scalar_stmt_cost. */
1020 4, /* scalar load_cost. */
1021 4, /* scalar_store_cost. */
1022 6, /* vec_stmt_cost. */
1023 0, /* vec_to_scalar_cost. */
1024 2, /* scalar_to_vec_cost. */
1025 4, /* vec_align_load_cost. */
1026 4, /* vec_unalign_load_cost. */
1027 4, /* vec_store_cost. */
1028 4, /* cond_taken_branch_cost. */
1029 2, /* cond_not_taken_branch_cost. */
1032 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1033 very small blocks it is better to use loop. For large blocks, libcall
1034 can do nontemporary accesses and beat inline considerably. */
1036 static stringop_algs bdver2_memcpy[2] = {
1037 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1038 {-1, rep_prefix_4_byte, false}}},
1039 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1040 {-1, libcall, false}}}};
1041 static stringop_algs bdver2_memset[2] = {
1042 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1043 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1044 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1045 {-1, libcall, false}}}};
1047 const struct processor_costs bdver2_cost = {
1048 COSTS_N_INSNS (1), /* cost of an add instruction */
1049 COSTS_N_INSNS (1), /* cost of a lea instruction */
1050 COSTS_N_INSNS (1), /* variable shift costs */
1051 COSTS_N_INSNS (1), /* constant shift costs */
1052 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1053 COSTS_N_INSNS (4), /* HI */
1054 COSTS_N_INSNS (4), /* SI */
1055 COSTS_N_INSNS (6), /* DI */
1056 COSTS_N_INSNS (6)}, /* other */
1057 0, /* cost of multiply per each bit set */
1058 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1059 COSTS_N_INSNS (35), /* HI */
1060 COSTS_N_INSNS (51), /* SI */
1061 COSTS_N_INSNS (83), /* DI */
1062 COSTS_N_INSNS (83)}, /* other */
1063 COSTS_N_INSNS (1), /* cost of movsx */
1064 COSTS_N_INSNS (1), /* cost of movzx */
1065 8, /* "large" insn */
1066 9, /* MOVE_RATIO */
1067 4, /* cost for loading QImode using movzbl */
1068 {5, 5, 4}, /* cost of loading integer registers
1069 in QImode, HImode and SImode.
1070 Relative to reg-reg move (2). */
1071 {4, 4, 4}, /* cost of storing integer registers */
1072 2, /* cost of reg,reg fld/fst */
1073 {5, 5, 12}, /* cost of loading fp registers
1074 in SFmode, DFmode and XFmode */
1075 {4, 4, 8}, /* cost of storing fp registers
1076 in SFmode, DFmode and XFmode */
1077 2, /* cost of moving MMX register */
1078 {4, 4}, /* cost of loading MMX registers
1079 in SImode and DImode */
1080 {4, 4}, /* cost of storing MMX registers
1081 in SImode and DImode */
1082 2, /* cost of moving SSE register */
1083 {4, 4, 4}, /* cost of loading SSE registers
1084 in SImode, DImode and TImode */
1085 {4, 4, 4}, /* cost of storing SSE registers
1086 in SImode, DImode and TImode */
1087 2, /* MMX or SSE register to integer */
1088 /* On K8:
1089 MOVD reg64, xmmreg Double FSTORE 4
1090 MOVD reg32, xmmreg Double FSTORE 4
1091 On AMDFAM10:
1092 MOVD reg64, xmmreg Double FADD 3
1093 1/1 1/1
1094 MOVD reg32, xmmreg Double FADD 3
1095 1/1 1/1 */
1096 16, /* size of l1 cache. */
1097 2048, /* size of l2 cache. */
1098 64, /* size of prefetch block */
1099 /* New AMD processors never drop prefetches; if they cannot be performed
1100 immediately, they are queued. We set number of simultaneous prefetches
1101 to a large constant to reflect this (it probably is not a good idea not
1102 to limit number of prefetches at all, as their execution also takes some
1103 time). */
1104 100, /* number of parallel prefetches */
1105 2, /* Branch cost */
1106 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1107 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1108 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1109 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1110 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1111 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1113 bdver2_memcpy,
1114 bdver2_memset,
1115 6, /* scalar_stmt_cost. */
1116 4, /* scalar load_cost. */
1117 4, /* scalar_store_cost. */
1118 6, /* vec_stmt_cost. */
1119 0, /* vec_to_scalar_cost. */
1120 2, /* scalar_to_vec_cost. */
1121 4, /* vec_align_load_cost. */
1122 4, /* vec_unalign_load_cost. */
1123 4, /* vec_store_cost. */
1124 4, /* cond_taken_branch_cost. */
1125 2, /* cond_not_taken_branch_cost. */
1129 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1130 very small blocks it is better to use loop. For large blocks, libcall
1131 can do nontemporary accesses and beat inline considerably. */
1132 static stringop_algs bdver3_memcpy[2] = {
1133 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1134 {-1, rep_prefix_4_byte, false}}},
1135 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1136 {-1, libcall, false}}}};
1137 static stringop_algs bdver3_memset[2] = {
1138 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1139 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1140 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1141 {-1, libcall, false}}}};
1142 struct processor_costs bdver3_cost = {
1143 COSTS_N_INSNS (1), /* cost of an add instruction */
1144 COSTS_N_INSNS (1), /* cost of a lea instruction */
1145 COSTS_N_INSNS (1), /* variable shift costs */
1146 COSTS_N_INSNS (1), /* constant shift costs */
1147 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1148 COSTS_N_INSNS (4), /* HI */
1149 COSTS_N_INSNS (4), /* SI */
1150 COSTS_N_INSNS (6), /* DI */
1151 COSTS_N_INSNS (6)}, /* other */
1152 0, /* cost of multiply per each bit set */
1153 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1154 COSTS_N_INSNS (35), /* HI */
1155 COSTS_N_INSNS (51), /* SI */
1156 COSTS_N_INSNS (83), /* DI */
1157 COSTS_N_INSNS (83)}, /* other */
1158 COSTS_N_INSNS (1), /* cost of movsx */
1159 COSTS_N_INSNS (1), /* cost of movzx */
1160 8, /* "large" insn */
1161 9, /* MOVE_RATIO */
1162 4, /* cost for loading QImode using movzbl */
1163 {5, 5, 4}, /* cost of loading integer registers
1164 in QImode, HImode and SImode.
1165 Relative to reg-reg move (2). */
1166 {4, 4, 4}, /* cost of storing integer registers */
1167 2, /* cost of reg,reg fld/fst */
1168 {5, 5, 12}, /* cost of loading fp registers
1169 in SFmode, DFmode and XFmode */
1170 {4, 4, 8}, /* cost of storing fp registers
1171 in SFmode, DFmode and XFmode */
1172 2, /* cost of moving MMX register */
1173 {4, 4}, /* cost of loading MMX registers
1174 in SImode and DImode */
1175 {4, 4}, /* cost of storing MMX registers
1176 in SImode and DImode */
1177 2, /* cost of moving SSE register */
1178 {4, 4, 4}, /* cost of loading SSE registers
1179 in SImode, DImode and TImode */
1180 {4, 4, 4}, /* cost of storing SSE registers
1181 in SImode, DImode and TImode */
1182 2, /* MMX or SSE register to integer */
1183 16, /* size of l1 cache. */
1184 2048, /* size of l2 cache. */
1185 64, /* size of prefetch block */
1186 /* New AMD processors never drop prefetches; if they cannot be performed
1187 immediately, they are queued. We set number of simultaneous prefetches
1188 to a large constant to reflect this (it probably is not a good idea not
1189 to limit number of prefetches at all, as their execution also takes some
1190 time). */
1191 100, /* number of parallel prefetches */
1192 2, /* Branch cost */
1193 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1194 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1195 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1196 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1197 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1198 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1200 bdver3_memcpy,
1201 bdver3_memset,
1202 6, /* scalar_stmt_cost. */
1203 4, /* scalar load_cost. */
1204 4, /* scalar_store_cost. */
1205 6, /* vec_stmt_cost. */
1206 0, /* vec_to_scalar_cost. */
1207 2, /* scalar_to_vec_cost. */
1208 4, /* vec_align_load_cost. */
1209 4, /* vec_unalign_load_cost. */
1210 4, /* vec_store_cost. */
1211 4, /* cond_taken_branch_cost. */
1212 2, /* cond_not_taken_branch_cost. */
1215 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1216 very small blocks it is better to use loop. For large blocks, libcall
1217 can do nontemporary accesses and beat inline considerably. */
1218 static stringop_algs bdver4_memcpy[2] = {
1219 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1220 {-1, rep_prefix_4_byte, false}}},
1221 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1222 {-1, libcall, false}}}};
1223 static stringop_algs bdver4_memset[2] = {
1224 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1225 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1226 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1227 {-1, libcall, false}}}};
1228 struct processor_costs bdver4_cost = {
1229 COSTS_N_INSNS (1), /* cost of an add instruction */
1230 COSTS_N_INSNS (1), /* cost of a lea instruction */
1231 COSTS_N_INSNS (1), /* variable shift costs */
1232 COSTS_N_INSNS (1), /* constant shift costs */
1233 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1234 COSTS_N_INSNS (4), /* HI */
1235 COSTS_N_INSNS (4), /* SI */
1236 COSTS_N_INSNS (6), /* DI */
1237 COSTS_N_INSNS (6)}, /* other */
1238 0, /* cost of multiply per each bit set */
1239 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1240 COSTS_N_INSNS (35), /* HI */
1241 COSTS_N_INSNS (51), /* SI */
1242 COSTS_N_INSNS (83), /* DI */
1243 COSTS_N_INSNS (83)}, /* other */
1244 COSTS_N_INSNS (1), /* cost of movsx */
1245 COSTS_N_INSNS (1), /* cost of movzx */
1246 8, /* "large" insn */
1247 9, /* MOVE_RATIO */
1248 4, /* cost for loading QImode using movzbl */
1249 {5, 5, 4}, /* cost of loading integer registers
1250 in QImode, HImode and SImode.
1251 Relative to reg-reg move (2). */
1252 {4, 4, 4}, /* cost of storing integer registers */
1253 2, /* cost of reg,reg fld/fst */
1254 {5, 5, 12}, /* cost of loading fp registers
1255 in SFmode, DFmode and XFmode */
1256 {4, 4, 8}, /* cost of storing fp registers
1257 in SFmode, DFmode and XFmode */
1258 2, /* cost of moving MMX register */
1259 {4, 4}, /* cost of loading MMX registers
1260 in SImode and DImode */
1261 {4, 4}, /* cost of storing MMX registers
1262 in SImode and DImode */
1263 2, /* cost of moving SSE register */
1264 {4, 4, 4}, /* cost of loading SSE registers
1265 in SImode, DImode and TImode */
1266 {4, 4, 4}, /* cost of storing SSE registers
1267 in SImode, DImode and TImode */
1268 2, /* MMX or SSE register to integer */
1269 16, /* size of l1 cache. */
1270 2048, /* size of l2 cache. */
1271 64, /* size of prefetch block */
1272 /* New AMD processors never drop prefetches; if they cannot be performed
1273 immediately, they are queued. We set number of simultaneous prefetches
1274 to a large constant to reflect this (it probably is not a good idea not
1275 to limit number of prefetches at all, as their execution also takes some
1276 time). */
1277 100, /* number of parallel prefetches */
1278 2, /* Branch cost */
1279 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1280 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1281 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1282 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1283 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1284 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1286 bdver4_memcpy,
1287 bdver4_memset,
1288 6, /* scalar_stmt_cost. */
1289 4, /* scalar load_cost. */
1290 4, /* scalar_store_cost. */
1291 6, /* vec_stmt_cost. */
1292 0, /* vec_to_scalar_cost. */
1293 2, /* scalar_to_vec_cost. */
1294 4, /* vec_align_load_cost. */
1295 4, /* vec_unalign_load_cost. */
1296 4, /* vec_store_cost. */
1297 4, /* cond_taken_branch_cost. */
1298 2, /* cond_not_taken_branch_cost. */
1301 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1302 very small blocks it is better to use loop. For large blocks, libcall can
1303 do nontemporary accesses and beat inline considerably. */
1304 static stringop_algs btver1_memcpy[2] = {
1305 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1306 {-1, rep_prefix_4_byte, false}}},
1307 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1308 {-1, libcall, false}}}};
1309 static stringop_algs btver1_memset[2] = {
1310 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1311 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1312 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1313 {-1, libcall, false}}}};
1314 const struct processor_costs btver1_cost = {
1315 COSTS_N_INSNS (1), /* cost of an add instruction */
1316 COSTS_N_INSNS (2), /* cost of a lea instruction */
1317 COSTS_N_INSNS (1), /* variable shift costs */
1318 COSTS_N_INSNS (1), /* constant shift costs */
1319 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1320 COSTS_N_INSNS (4), /* HI */
1321 COSTS_N_INSNS (3), /* SI */
1322 COSTS_N_INSNS (4), /* DI */
1323 COSTS_N_INSNS (5)}, /* other */
1324 0, /* cost of multiply per each bit set */
1325 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1326 COSTS_N_INSNS (35), /* HI */
1327 COSTS_N_INSNS (51), /* SI */
1328 COSTS_N_INSNS (83), /* DI */
1329 COSTS_N_INSNS (83)}, /* other */
1330 COSTS_N_INSNS (1), /* cost of movsx */
1331 COSTS_N_INSNS (1), /* cost of movzx */
1332 8, /* "large" insn */
1333 9, /* MOVE_RATIO */
1334 4, /* cost for loading QImode using movzbl */
1335 {3, 4, 3}, /* cost of loading integer registers
1336 in QImode, HImode and SImode.
1337 Relative to reg-reg move (2). */
1338 {3, 4, 3}, /* cost of storing integer registers */
1339 4, /* cost of reg,reg fld/fst */
1340 {4, 4, 12}, /* cost of loading fp registers
1341 in SFmode, DFmode and XFmode */
1342 {6, 6, 8}, /* cost of storing fp registers
1343 in SFmode, DFmode and XFmode */
1344 2, /* cost of moving MMX register */
1345 {3, 3}, /* cost of loading MMX registers
1346 in SImode and DImode */
1347 {4, 4}, /* cost of storing MMX registers
1348 in SImode and DImode */
1349 2, /* cost of moving SSE register */
1350 {4, 4, 3}, /* cost of loading SSE registers
1351 in SImode, DImode and TImode */
1352 {4, 4, 5}, /* cost of storing SSE registers
1353 in SImode, DImode and TImode */
1354 3, /* MMX or SSE register to integer */
1355 /* On K8:
1356 MOVD reg64, xmmreg Double FSTORE 4
1357 MOVD reg32, xmmreg Double FSTORE 4
1358 On AMDFAM10:
1359 MOVD reg64, xmmreg Double FADD 3
1360 1/1 1/1
1361 MOVD reg32, xmmreg Double FADD 3
1362 1/1 1/1 */
1363 32, /* size of l1 cache. */
1364 512, /* size of l2 cache. */
1365 64, /* size of prefetch block */
1366 100, /* number of parallel prefetches */
1367 2, /* Branch cost */
1368 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1369 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1370 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1371 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1372 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1373 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1375 btver1_memcpy,
1376 btver1_memset,
1377 4, /* scalar_stmt_cost. */
1378 2, /* scalar load_cost. */
1379 2, /* scalar_store_cost. */
1380 6, /* vec_stmt_cost. */
1381 0, /* vec_to_scalar_cost. */
1382 2, /* scalar_to_vec_cost. */
1383 2, /* vec_align_load_cost. */
1384 2, /* vec_unalign_load_cost. */
1385 2, /* vec_store_cost. */
1386 2, /* cond_taken_branch_cost. */
1387 1, /* cond_not_taken_branch_cost. */
1390 static stringop_algs btver2_memcpy[2] = {
1391 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1392 {-1, rep_prefix_4_byte, false}}},
1393 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1394 {-1, libcall, false}}}};
1395 static stringop_algs btver2_memset[2] = {
1396 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1397 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1398 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1399 {-1, libcall, false}}}};
1400 const struct processor_costs btver2_cost = {
1401 COSTS_N_INSNS (1), /* cost of an add instruction */
1402 COSTS_N_INSNS (2), /* cost of a lea instruction */
1403 COSTS_N_INSNS (1), /* variable shift costs */
1404 COSTS_N_INSNS (1), /* constant shift costs */
1405 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1406 COSTS_N_INSNS (4), /* HI */
1407 COSTS_N_INSNS (3), /* SI */
1408 COSTS_N_INSNS (4), /* DI */
1409 COSTS_N_INSNS (5)}, /* other */
1410 0, /* cost of multiply per each bit set */
1411 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1412 COSTS_N_INSNS (35), /* HI */
1413 COSTS_N_INSNS (51), /* SI */
1414 COSTS_N_INSNS (83), /* DI */
1415 COSTS_N_INSNS (83)}, /* other */
1416 COSTS_N_INSNS (1), /* cost of movsx */
1417 COSTS_N_INSNS (1), /* cost of movzx */
1418 8, /* "large" insn */
1419 9, /* MOVE_RATIO */
1420 4, /* cost for loading QImode using movzbl */
1421 {3, 4, 3}, /* cost of loading integer registers
1422 in QImode, HImode and SImode.
1423 Relative to reg-reg move (2). */
1424 {3, 4, 3}, /* cost of storing integer registers */
1425 4, /* cost of reg,reg fld/fst */
1426 {4, 4, 12}, /* cost of loading fp registers
1427 in SFmode, DFmode and XFmode */
1428 {6, 6, 8}, /* cost of storing fp registers
1429 in SFmode, DFmode and XFmode */
1430 2, /* cost of moving MMX register */
1431 {3, 3}, /* cost of loading MMX registers
1432 in SImode and DImode */
1433 {4, 4}, /* cost of storing MMX registers
1434 in SImode and DImode */
1435 2, /* cost of moving SSE register */
1436 {4, 4, 3}, /* cost of loading SSE registers
1437 in SImode, DImode and TImode */
1438 {4, 4, 5}, /* cost of storing SSE registers
1439 in SImode, DImode and TImode */
1440 3, /* MMX or SSE register to integer */
1441 /* On K8:
1442 MOVD reg64, xmmreg Double FSTORE 4
1443 MOVD reg32, xmmreg Double FSTORE 4
1444 On AMDFAM10:
1445 MOVD reg64, xmmreg Double FADD 3
1446 1/1 1/1
1447 MOVD reg32, xmmreg Double FADD 3
1448 1/1 1/1 */
1449 32, /* size of l1 cache. */
1450 2048, /* size of l2 cache. */
1451 64, /* size of prefetch block */
1452 100, /* number of parallel prefetches */
1453 2, /* Branch cost */
1454 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1455 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1456 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1457 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1458 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1459 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1460 btver2_memcpy,
1461 btver2_memset,
1462 4, /* scalar_stmt_cost. */
1463 2, /* scalar load_cost. */
1464 2, /* scalar_store_cost. */
1465 6, /* vec_stmt_cost. */
1466 0, /* vec_to_scalar_cost. */
1467 2, /* scalar_to_vec_cost. */
1468 2, /* vec_align_load_cost. */
1469 2, /* vec_unalign_load_cost. */
1470 2, /* vec_store_cost. */
1471 2, /* cond_taken_branch_cost. */
1472 1, /* cond_not_taken_branch_cost. */
1475 static stringop_algs pentium4_memcpy[2] = {
1476 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1477 DUMMY_STRINGOP_ALGS};
1478 static stringop_algs pentium4_memset[2] = {
1479 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1480 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1481 DUMMY_STRINGOP_ALGS};
1483 static const
1484 struct processor_costs pentium4_cost = {
1485 COSTS_N_INSNS (1), /* cost of an add instruction */
1486 COSTS_N_INSNS (3), /* cost of a lea instruction */
1487 COSTS_N_INSNS (4), /* variable shift costs */
1488 COSTS_N_INSNS (4), /* constant shift costs */
1489 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1490 COSTS_N_INSNS (15), /* HI */
1491 COSTS_N_INSNS (15), /* SI */
1492 COSTS_N_INSNS (15), /* DI */
1493 COSTS_N_INSNS (15)}, /* other */
1494 0, /* cost of multiply per each bit set */
1495 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1496 COSTS_N_INSNS (56), /* HI */
1497 COSTS_N_INSNS (56), /* SI */
1498 COSTS_N_INSNS (56), /* DI */
1499 COSTS_N_INSNS (56)}, /* other */
1500 COSTS_N_INSNS (1), /* cost of movsx */
1501 COSTS_N_INSNS (1), /* cost of movzx */
1502 16, /* "large" insn */
1503 6, /* MOVE_RATIO */
1504 2, /* cost for loading QImode using movzbl */
1505 {4, 5, 4}, /* cost of loading integer registers
1506 in QImode, HImode and SImode.
1507 Relative to reg-reg move (2). */
1508 {2, 3, 2}, /* cost of storing integer registers */
1509 2, /* cost of reg,reg fld/fst */
1510 {2, 2, 6}, /* cost of loading fp registers
1511 in SFmode, DFmode and XFmode */
1512 {4, 4, 6}, /* cost of storing fp registers
1513 in SFmode, DFmode and XFmode */
1514 2, /* cost of moving MMX register */
1515 {2, 2}, /* cost of loading MMX registers
1516 in SImode and DImode */
1517 {2, 2}, /* cost of storing MMX registers
1518 in SImode and DImode */
1519 12, /* cost of moving SSE register */
1520 {12, 12, 12}, /* cost of loading SSE registers
1521 in SImode, DImode and TImode */
1522 {2, 2, 8}, /* cost of storing SSE registers
1523 in SImode, DImode and TImode */
1524 10, /* MMX or SSE register to integer */
1525 8, /* size of l1 cache. */
1526 256, /* size of l2 cache. */
1527 64, /* size of prefetch block */
1528 6, /* number of parallel prefetches */
1529 2, /* Branch cost */
1530 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1531 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1532 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1533 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1534 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1535 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1536 pentium4_memcpy,
1537 pentium4_memset,
1538 1, /* scalar_stmt_cost. */
1539 1, /* scalar load_cost. */
1540 1, /* scalar_store_cost. */
1541 1, /* vec_stmt_cost. */
1542 1, /* vec_to_scalar_cost. */
1543 1, /* scalar_to_vec_cost. */
1544 1, /* vec_align_load_cost. */
1545 2, /* vec_unalign_load_cost. */
1546 1, /* vec_store_cost. */
1547 3, /* cond_taken_branch_cost. */
1548 1, /* cond_not_taken_branch_cost. */
1551 static stringop_algs nocona_memcpy[2] = {
1552 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1553 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1554 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1556 static stringop_algs nocona_memset[2] = {
1557 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1558 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1559 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1560 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1562 static const
1563 struct processor_costs nocona_cost = {
1564 COSTS_N_INSNS (1), /* cost of an add instruction */
1565 COSTS_N_INSNS (1), /* cost of a lea instruction */
1566 COSTS_N_INSNS (1), /* variable shift costs */
1567 COSTS_N_INSNS (1), /* constant shift costs */
1568 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1569 COSTS_N_INSNS (10), /* HI */
1570 COSTS_N_INSNS (10), /* SI */
1571 COSTS_N_INSNS (10), /* DI */
1572 COSTS_N_INSNS (10)}, /* other */
1573 0, /* cost of multiply per each bit set */
1574 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1575 COSTS_N_INSNS (66), /* HI */
1576 COSTS_N_INSNS (66), /* SI */
1577 COSTS_N_INSNS (66), /* DI */
1578 COSTS_N_INSNS (66)}, /* other */
1579 COSTS_N_INSNS (1), /* cost of movsx */
1580 COSTS_N_INSNS (1), /* cost of movzx */
1581 16, /* "large" insn */
1582 17, /* MOVE_RATIO */
1583 4, /* cost for loading QImode using movzbl */
1584 {4, 4, 4}, /* cost of loading integer registers
1585 in QImode, HImode and SImode.
1586 Relative to reg-reg move (2). */
1587 {4, 4, 4}, /* cost of storing integer registers */
1588 3, /* cost of reg,reg fld/fst */
1589 {12, 12, 12}, /* cost of loading fp registers
1590 in SFmode, DFmode and XFmode */
1591 {4, 4, 4}, /* cost of storing fp registers
1592 in SFmode, DFmode and XFmode */
1593 6, /* cost of moving MMX register */
1594 {12, 12}, /* cost of loading MMX registers
1595 in SImode and DImode */
1596 {12, 12}, /* cost of storing MMX registers
1597 in SImode and DImode */
1598 6, /* cost of moving SSE register */
1599 {12, 12, 12}, /* cost of loading SSE registers
1600 in SImode, DImode and TImode */
1601 {12, 12, 12}, /* cost of storing SSE registers
1602 in SImode, DImode and TImode */
1603 8, /* MMX or SSE register to integer */
1604 8, /* size of l1 cache. */
1605 1024, /* size of l2 cache. */
1606 64, /* size of prefetch block */
1607 8, /* number of parallel prefetches */
1608 1, /* Branch cost */
1609 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1610 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1611 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1612 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1613 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1614 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1615 nocona_memcpy,
1616 nocona_memset,
1617 1, /* scalar_stmt_cost. */
1618 1, /* scalar load_cost. */
1619 1, /* scalar_store_cost. */
1620 1, /* vec_stmt_cost. */
1621 1, /* vec_to_scalar_cost. */
1622 1, /* scalar_to_vec_cost. */
1623 1, /* vec_align_load_cost. */
1624 2, /* vec_unalign_load_cost. */
1625 1, /* vec_store_cost. */
1626 3, /* cond_taken_branch_cost. */
1627 1, /* cond_not_taken_branch_cost. */
1630 static stringop_algs atom_memcpy[2] = {
1631 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1632 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1633 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1634 static stringop_algs atom_memset[2] = {
1635 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1636 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1637 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1638 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1639 static const
1640 struct processor_costs atom_cost = {
1641 COSTS_N_INSNS (1), /* cost of an add instruction */
1642 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1643 COSTS_N_INSNS (1), /* variable shift costs */
1644 COSTS_N_INSNS (1), /* constant shift costs */
1645 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1646 COSTS_N_INSNS (4), /* HI */
1647 COSTS_N_INSNS (3), /* SI */
1648 COSTS_N_INSNS (4), /* DI */
1649 COSTS_N_INSNS (2)}, /* other */
1650 0, /* cost of multiply per each bit set */
1651 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1652 COSTS_N_INSNS (26), /* HI */
1653 COSTS_N_INSNS (42), /* SI */
1654 COSTS_N_INSNS (74), /* DI */
1655 COSTS_N_INSNS (74)}, /* other */
1656 COSTS_N_INSNS (1), /* cost of movsx */
1657 COSTS_N_INSNS (1), /* cost of movzx */
1658 8, /* "large" insn */
1659 17, /* MOVE_RATIO */
1660 4, /* cost for loading QImode using movzbl */
1661 {4, 4, 4}, /* cost of loading integer registers
1662 in QImode, HImode and SImode.
1663 Relative to reg-reg move (2). */
1664 {4, 4, 4}, /* cost of storing integer registers */
1665 4, /* cost of reg,reg fld/fst */
1666 {12, 12, 12}, /* cost of loading fp registers
1667 in SFmode, DFmode and XFmode */
1668 {6, 6, 8}, /* cost of storing fp registers
1669 in SFmode, DFmode and XFmode */
1670 2, /* cost of moving MMX register */
1671 {8, 8}, /* cost of loading MMX registers
1672 in SImode and DImode */
1673 {8, 8}, /* cost of storing MMX registers
1674 in SImode and DImode */
1675 2, /* cost of moving SSE register */
1676 {8, 8, 8}, /* cost of loading SSE registers
1677 in SImode, DImode and TImode */
1678 {8, 8, 8}, /* cost of storing SSE registers
1679 in SImode, DImode and TImode */
1680 5, /* MMX or SSE register to integer */
1681 32, /* size of l1 cache. */
1682 256, /* size of l2 cache. */
1683 64, /* size of prefetch block */
1684 6, /* number of parallel prefetches */
1685 3, /* Branch cost */
1686 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1687 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1688 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1689 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1690 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1691 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1692 atom_memcpy,
1693 atom_memset,
1694 1, /* scalar_stmt_cost. */
1695 1, /* scalar load_cost. */
1696 1, /* scalar_store_cost. */
1697 1, /* vec_stmt_cost. */
1698 1, /* vec_to_scalar_cost. */
1699 1, /* scalar_to_vec_cost. */
1700 1, /* vec_align_load_cost. */
1701 2, /* vec_unalign_load_cost. */
1702 1, /* vec_store_cost. */
1703 3, /* cond_taken_branch_cost. */
1704 1, /* cond_not_taken_branch_cost. */
1707 static stringop_algs slm_memcpy[2] = {
1708 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1709 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1710 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1711 static stringop_algs slm_memset[2] = {
1712 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1713 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1714 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1715 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1716 static const
1717 struct processor_costs slm_cost = {
1718 COSTS_N_INSNS (1), /* cost of an add instruction */
1719 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1720 COSTS_N_INSNS (1), /* variable shift costs */
1721 COSTS_N_INSNS (1), /* constant shift costs */
1722 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1723 COSTS_N_INSNS (3), /* HI */
1724 COSTS_N_INSNS (3), /* SI */
1725 COSTS_N_INSNS (4), /* DI */
1726 COSTS_N_INSNS (2)}, /* other */
1727 0, /* cost of multiply per each bit set */
1728 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1729 COSTS_N_INSNS (26), /* HI */
1730 COSTS_N_INSNS (42), /* SI */
1731 COSTS_N_INSNS (74), /* DI */
1732 COSTS_N_INSNS (74)}, /* other */
1733 COSTS_N_INSNS (1), /* cost of movsx */
1734 COSTS_N_INSNS (1), /* cost of movzx */
1735 8, /* "large" insn */
1736 17, /* MOVE_RATIO */
1737 4, /* cost for loading QImode using movzbl */
1738 {4, 4, 4}, /* cost of loading integer registers
1739 in QImode, HImode and SImode.
1740 Relative to reg-reg move (2). */
1741 {4, 4, 4}, /* cost of storing integer registers */
1742 4, /* cost of reg,reg fld/fst */
1743 {12, 12, 12}, /* cost of loading fp registers
1744 in SFmode, DFmode and XFmode */
1745 {6, 6, 8}, /* cost of storing fp registers
1746 in SFmode, DFmode and XFmode */
1747 2, /* cost of moving MMX register */
1748 {8, 8}, /* cost of loading MMX registers
1749 in SImode and DImode */
1750 {8, 8}, /* cost of storing MMX registers
1751 in SImode and DImode */
1752 2, /* cost of moving SSE register */
1753 {8, 8, 8}, /* cost of loading SSE registers
1754 in SImode, DImode and TImode */
1755 {8, 8, 8}, /* cost of storing SSE registers
1756 in SImode, DImode and TImode */
1757 5, /* MMX or SSE register to integer */
1758 32, /* size of l1 cache. */
1759 256, /* size of l2 cache. */
1760 64, /* size of prefetch block */
1761 6, /* number of parallel prefetches */
1762 3, /* Branch cost */
1763 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1764 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1765 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1766 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1767 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1768 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1769 slm_memcpy,
1770 slm_memset,
1771 1, /* scalar_stmt_cost. */
1772 1, /* scalar load_cost. */
1773 1, /* scalar_store_cost. */
1774 1, /* vec_stmt_cost. */
1775 4, /* vec_to_scalar_cost. */
1776 1, /* scalar_to_vec_cost. */
1777 1, /* vec_align_load_cost. */
1778 2, /* vec_unalign_load_cost. */
1779 1, /* vec_store_cost. */
1780 3, /* cond_taken_branch_cost. */
1781 1, /* cond_not_taken_branch_cost. */
1784 static stringop_algs intel_memcpy[2] = {
1785 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1786 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1787 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1788 static stringop_algs intel_memset[2] = {
1789 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1790 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1791 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1792 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1793 static const
1794 struct processor_costs intel_cost = {
1795 COSTS_N_INSNS (1), /* cost of an add instruction */
1796 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1797 COSTS_N_INSNS (1), /* variable shift costs */
1798 COSTS_N_INSNS (1), /* constant shift costs */
1799 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1800 COSTS_N_INSNS (3), /* HI */
1801 COSTS_N_INSNS (3), /* SI */
1802 COSTS_N_INSNS (4), /* DI */
1803 COSTS_N_INSNS (2)}, /* other */
1804 0, /* cost of multiply per each bit set */
1805 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1806 COSTS_N_INSNS (26), /* HI */
1807 COSTS_N_INSNS (42), /* SI */
1808 COSTS_N_INSNS (74), /* DI */
1809 COSTS_N_INSNS (74)}, /* other */
1810 COSTS_N_INSNS (1), /* cost of movsx */
1811 COSTS_N_INSNS (1), /* cost of movzx */
1812 8, /* "large" insn */
1813 17, /* MOVE_RATIO */
1814 4, /* cost for loading QImode using movzbl */
1815 {4, 4, 4}, /* cost of loading integer registers
1816 in QImode, HImode and SImode.
1817 Relative to reg-reg move (2). */
1818 {4, 4, 4}, /* cost of storing integer registers */
1819 4, /* cost of reg,reg fld/fst */
1820 {12, 12, 12}, /* cost of loading fp registers
1821 in SFmode, DFmode and XFmode */
1822 {6, 6, 8}, /* cost of storing fp registers
1823 in SFmode, DFmode and XFmode */
1824 2, /* cost of moving MMX register */
1825 {8, 8}, /* cost of loading MMX registers
1826 in SImode and DImode */
1827 {8, 8}, /* cost of storing MMX registers
1828 in SImode and DImode */
1829 2, /* cost of moving SSE register */
1830 {8, 8, 8}, /* cost of loading SSE registers
1831 in SImode, DImode and TImode */
1832 {8, 8, 8}, /* cost of storing SSE registers
1833 in SImode, DImode and TImode */
1834 5, /* MMX or SSE register to integer */
1835 32, /* size of l1 cache. */
1836 256, /* size of l2 cache. */
1837 64, /* size of prefetch block */
1838 6, /* number of parallel prefetches */
1839 3, /* Branch cost */
1840 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1841 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1842 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1843 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1844 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1845 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1846 intel_memcpy,
1847 intel_memset,
1848 1, /* scalar_stmt_cost. */
1849 1, /* scalar load_cost. */
1850 1, /* scalar_store_cost. */
1851 1, /* vec_stmt_cost. */
1852 4, /* vec_to_scalar_cost. */
1853 1, /* scalar_to_vec_cost. */
1854 1, /* vec_align_load_cost. */
1855 2, /* vec_unalign_load_cost. */
1856 1, /* vec_store_cost. */
1857 3, /* cond_taken_branch_cost. */
1858 1, /* cond_not_taken_branch_cost. */
1861 /* Generic should produce code tuned for Core-i7 (and newer chips)
1862 and btver1 (and newer chips). */
1864 static stringop_algs generic_memcpy[2] = {
1865 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1866 {-1, libcall, false}}},
1867 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1868 {-1, libcall, false}}}};
1869 static stringop_algs generic_memset[2] = {
1870 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1871 {-1, libcall, false}}},
1872 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1873 {-1, libcall, false}}}};
1874 static const
1875 struct processor_costs generic_cost = {
1876 COSTS_N_INSNS (1), /* cost of an add instruction */
1877 /* On all chips taken into consideration lea is 2 cycles and more. With
1878 this cost however our current implementation of synth_mult results in
1879 use of unnecessary temporary registers causing regression on several
1880 SPECfp benchmarks. */
1881 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1882 COSTS_N_INSNS (1), /* variable shift costs */
1883 COSTS_N_INSNS (1), /* constant shift costs */
1884 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1885 COSTS_N_INSNS (4), /* HI */
1886 COSTS_N_INSNS (3), /* SI */
1887 COSTS_N_INSNS (4), /* DI */
1888 COSTS_N_INSNS (2)}, /* other */
1889 0, /* cost of multiply per each bit set */
1890 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1891 COSTS_N_INSNS (26), /* HI */
1892 COSTS_N_INSNS (42), /* SI */
1893 COSTS_N_INSNS (74), /* DI */
1894 COSTS_N_INSNS (74)}, /* other */
1895 COSTS_N_INSNS (1), /* cost of movsx */
1896 COSTS_N_INSNS (1), /* cost of movzx */
1897 8, /* "large" insn */
1898 17, /* MOVE_RATIO */
1899 4, /* cost for loading QImode using movzbl */
1900 {4, 4, 4}, /* cost of loading integer registers
1901 in QImode, HImode and SImode.
1902 Relative to reg-reg move (2). */
1903 {4, 4, 4}, /* cost of storing integer registers */
1904 4, /* cost of reg,reg fld/fst */
1905 {12, 12, 12}, /* cost of loading fp registers
1906 in SFmode, DFmode and XFmode */
1907 {6, 6, 8}, /* cost of storing fp registers
1908 in SFmode, DFmode and XFmode */
1909 2, /* cost of moving MMX register */
1910 {8, 8}, /* cost of loading MMX registers
1911 in SImode and DImode */
1912 {8, 8}, /* cost of storing MMX registers
1913 in SImode and DImode */
1914 2, /* cost of moving SSE register */
1915 {8, 8, 8}, /* cost of loading SSE registers
1916 in SImode, DImode and TImode */
1917 {8, 8, 8}, /* cost of storing SSE registers
1918 in SImode, DImode and TImode */
1919 5, /* MMX or SSE register to integer */
1920 32, /* size of l1 cache. */
1921 512, /* size of l2 cache. */
1922 64, /* size of prefetch block */
1923 6, /* number of parallel prefetches */
1924 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1925 value is increased to perhaps more appropriate value of 5. */
1926 3, /* Branch cost */
1927 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1928 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1929 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1930 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1931 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1932 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1933 generic_memcpy,
1934 generic_memset,
1935 1, /* scalar_stmt_cost. */
1936 1, /* scalar load_cost. */
1937 1, /* scalar_store_cost. */
1938 1, /* vec_stmt_cost. */
1939 1, /* vec_to_scalar_cost. */
1940 1, /* scalar_to_vec_cost. */
1941 1, /* vec_align_load_cost. */
1942 2, /* vec_unalign_load_cost. */
1943 1, /* vec_store_cost. */
1944 3, /* cond_taken_branch_cost. */
1945 1, /* cond_not_taken_branch_cost. */
1948 /* core_cost should produce code tuned for Core familly of CPUs. */
1949 static stringop_algs core_memcpy[2] = {
1950 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1951 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1952 {-1, libcall, false}}}};
1953 static stringop_algs core_memset[2] = {
1954 {libcall, {{6, loop_1_byte, true},
1955 {24, loop, true},
1956 {8192, rep_prefix_4_byte, true},
1957 {-1, libcall, false}}},
1958 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1959 {-1, libcall, false}}}};
1961 static const
1962 struct processor_costs core_cost = {
1963 COSTS_N_INSNS (1), /* cost of an add instruction */
1964 /* On all chips taken into consideration lea is 2 cycles and more. With
1965 this cost however our current implementation of synth_mult results in
1966 use of unnecessary temporary registers causing regression on several
1967 SPECfp benchmarks. */
1968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1969 COSTS_N_INSNS (1), /* variable shift costs */
1970 COSTS_N_INSNS (1), /* constant shift costs */
1971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1972 COSTS_N_INSNS (4), /* HI */
1973 COSTS_N_INSNS (3), /* SI */
1974 COSTS_N_INSNS (4), /* DI */
1975 COSTS_N_INSNS (2)}, /* other */
1976 0, /* cost of multiply per each bit set */
1977 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1978 COSTS_N_INSNS (26), /* HI */
1979 COSTS_N_INSNS (42), /* SI */
1980 COSTS_N_INSNS (74), /* DI */
1981 COSTS_N_INSNS (74)}, /* other */
1982 COSTS_N_INSNS (1), /* cost of movsx */
1983 COSTS_N_INSNS (1), /* cost of movzx */
1984 8, /* "large" insn */
1985 17, /* MOVE_RATIO */
1986 4, /* cost for loading QImode using movzbl */
1987 {4, 4, 4}, /* cost of loading integer registers
1988 in QImode, HImode and SImode.
1989 Relative to reg-reg move (2). */
1990 {4, 4, 4}, /* cost of storing integer registers */
1991 4, /* cost of reg,reg fld/fst */
1992 {12, 12, 12}, /* cost of loading fp registers
1993 in SFmode, DFmode and XFmode */
1994 {6, 6, 8}, /* cost of storing fp registers
1995 in SFmode, DFmode and XFmode */
1996 2, /* cost of moving MMX register */
1997 {8, 8}, /* cost of loading MMX registers
1998 in SImode and DImode */
1999 {8, 8}, /* cost of storing MMX registers
2000 in SImode and DImode */
2001 2, /* cost of moving SSE register */
2002 {8, 8, 8}, /* cost of loading SSE registers
2003 in SImode, DImode and TImode */
2004 {8, 8, 8}, /* cost of storing SSE registers
2005 in SImode, DImode and TImode */
2006 5, /* MMX or SSE register to integer */
2007 64, /* size of l1 cache. */
2008 512, /* size of l2 cache. */
2009 64, /* size of prefetch block */
2010 6, /* number of parallel prefetches */
2011 /* FIXME perhaps more appropriate value is 5. */
2012 3, /* Branch cost */
2013 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2014 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2015 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2016 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2017 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2018 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2019 core_memcpy,
2020 core_memset,
2021 1, /* scalar_stmt_cost. */
2022 1, /* scalar load_cost. */
2023 1, /* scalar_store_cost. */
2024 1, /* vec_stmt_cost. */
2025 1, /* vec_to_scalar_cost. */
2026 1, /* scalar_to_vec_cost. */
2027 1, /* vec_align_load_cost. */
2028 2, /* vec_unalign_load_cost. */
2029 1, /* vec_store_cost. */
2030 3, /* cond_taken_branch_cost. */
2031 1, /* cond_not_taken_branch_cost. */
2035 /* Set by -mtune. */
2036 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2038 /* Set by -mtune or -Os. */
2039 const struct processor_costs *ix86_cost = &pentium_cost;
2041 /* Processor feature/optimization bitmasks. */
2042 #define m_386 (1<<PROCESSOR_I386)
2043 #define m_486 (1<<PROCESSOR_I486)
2044 #define m_PENT (1<<PROCESSOR_PENTIUM)
2045 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2046 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2047 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2048 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2049 #define m_CORE2 (1<<PROCESSOR_CORE2)
2050 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2051 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2052 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2053 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2054 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2055 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2056 #define m_KNL (1<<PROCESSOR_KNL)
2057 #define m_INTEL (1<<PROCESSOR_INTEL)
2059 #define m_GEODE (1<<PROCESSOR_GEODE)
2060 #define m_K6 (1<<PROCESSOR_K6)
2061 #define m_K6_GEODE (m_K6 | m_GEODE)
2062 #define m_K8 (1<<PROCESSOR_K8)
2063 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2064 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2065 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2066 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2067 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2068 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2069 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2070 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2071 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2072 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2073 #define m_BTVER (m_BTVER1 | m_BTVER2)
2074 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2076 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2078 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2079 #undef DEF_TUNE
2080 #define DEF_TUNE(tune, name, selector) name,
2081 #include "x86-tune.def"
2082 #undef DEF_TUNE
2085 /* Feature tests against the various tunings. */
2086 unsigned char ix86_tune_features[X86_TUNE_LAST];
2088 /* Feature tests against the various tunings used to create ix86_tune_features
2089 based on the processor mask. */
2090 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2091 #undef DEF_TUNE
2092 #define DEF_TUNE(tune, name, selector) selector,
2093 #include "x86-tune.def"
2094 #undef DEF_TUNE
2097 /* Feature tests against the various architecture variations. */
2098 unsigned char ix86_arch_features[X86_ARCH_LAST];
2100 /* Feature tests against the various architecture variations, used to create
2101 ix86_arch_features based on the processor mask. */
2102 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2103 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2104 ~(m_386 | m_486 | m_PENT | m_K6),
2106 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2107 ~m_386,
2109 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2110 ~(m_386 | m_486),
2112 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2113 ~m_386,
2115 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2116 ~m_386,
2119 /* In case the average insn count for single function invocation is
2120 lower than this constant, emit fast (but longer) prologue and
2121 epilogue code. */
2122 #define FAST_PROLOGUE_INSN_COUNT 20
2124 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2125 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2126 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2127 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2129 /* Array of the smallest class containing reg number REGNO, indexed by
2130 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2132 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2134 /* ax, dx, cx, bx */
2135 AREG, DREG, CREG, BREG,
2136 /* si, di, bp, sp */
2137 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2138 /* FP registers */
2139 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2140 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2141 /* arg pointer */
2142 NON_Q_REGS,
2143 /* flags, fpsr, fpcr, frame */
2144 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2145 /* SSE registers */
2146 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2147 SSE_REGS, SSE_REGS,
2148 /* MMX registers */
2149 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2150 MMX_REGS, MMX_REGS,
2151 /* REX registers */
2152 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2153 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2154 /* SSE REX registers */
2155 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2156 SSE_REGS, SSE_REGS,
2157 /* AVX-512 SSE registers */
2158 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2159 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2160 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2161 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2162 /* Mask registers. */
2163 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2164 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2165 /* MPX bound registers */
2166 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2169 /* The "default" register map used in 32bit mode. */
2171 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2173 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2174 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2175 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2176 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2177 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2178 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2179 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2180 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2181 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2182 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2183 101, 102, 103, 104, /* bound registers */
2186 /* The "default" register map used in 64bit mode. */
2188 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2190 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2191 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2192 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2193 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2194 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2195 8,9,10,11,12,13,14,15, /* extended integer registers */
2196 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2197 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2198 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2199 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2200 126, 127, 128, 129, /* bound registers */
2203 /* Define the register numbers to be used in Dwarf debugging information.
2204 The SVR4 reference port C compiler uses the following register numbers
2205 in its Dwarf output code:
2206 0 for %eax (gcc regno = 0)
2207 1 for %ecx (gcc regno = 2)
2208 2 for %edx (gcc regno = 1)
2209 3 for %ebx (gcc regno = 3)
2210 4 for %esp (gcc regno = 7)
2211 5 for %ebp (gcc regno = 6)
2212 6 for %esi (gcc regno = 4)
2213 7 for %edi (gcc regno = 5)
2214 The following three DWARF register numbers are never generated by
2215 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2216 believes these numbers have these meanings.
2217 8 for %eip (no gcc equivalent)
2218 9 for %eflags (gcc regno = 17)
2219 10 for %trapno (no gcc equivalent)
2220 It is not at all clear how we should number the FP stack registers
2221 for the x86 architecture. If the version of SDB on x86/svr4 were
2222 a bit less brain dead with respect to floating-point then we would
2223 have a precedent to follow with respect to DWARF register numbers
2224 for x86 FP registers, but the SDB on x86/svr4 is so completely
2225 broken with respect to FP registers that it is hardly worth thinking
2226 of it as something to strive for compatibility with.
2227 The version of x86/svr4 SDB I have at the moment does (partially)
2228 seem to believe that DWARF register number 11 is associated with
2229 the x86 register %st(0), but that's about all. Higher DWARF
2230 register numbers don't seem to be associated with anything in
2231 particular, and even for DWARF regno 11, SDB only seems to under-
2232 stand that it should say that a variable lives in %st(0) (when
2233 asked via an `=' command) if we said it was in DWARF regno 11,
2234 but SDB still prints garbage when asked for the value of the
2235 variable in question (via a `/' command).
2236 (Also note that the labels SDB prints for various FP stack regs
2237 when doing an `x' command are all wrong.)
2238 Note that these problems generally don't affect the native SVR4
2239 C compiler because it doesn't allow the use of -O with -g and
2240 because when it is *not* optimizing, it allocates a memory
2241 location for each floating-point variable, and the memory
2242 location is what gets described in the DWARF AT_location
2243 attribute for the variable in question.
2244 Regardless of the severe mental illness of the x86/svr4 SDB, we
2245 do something sensible here and we use the following DWARF
2246 register numbers. Note that these are all stack-top-relative
2247 numbers.
2248 11 for %st(0) (gcc regno = 8)
2249 12 for %st(1) (gcc regno = 9)
2250 13 for %st(2) (gcc regno = 10)
2251 14 for %st(3) (gcc regno = 11)
2252 15 for %st(4) (gcc regno = 12)
2253 16 for %st(5) (gcc regno = 13)
2254 17 for %st(6) (gcc regno = 14)
2255 18 for %st(7) (gcc regno = 15)
2257 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2259 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2260 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2261 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2262 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2263 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2264 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2265 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2267 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2268 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2269 101, 102, 103, 104, /* bound registers */
2272 /* Define parameter passing and return registers. */
2274 static int const x86_64_int_parameter_registers[6] =
2276 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2279 static int const x86_64_ms_abi_int_parameter_registers[4] =
2281 CX_REG, DX_REG, R8_REG, R9_REG
2284 static int const x86_64_int_return_registers[4] =
2286 AX_REG, DX_REG, DI_REG, SI_REG
2289 /* Additional registers that are clobbered by SYSV calls. */
2291 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2293 SI_REG, DI_REG,
2294 XMM6_REG, XMM7_REG,
2295 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2296 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2299 /* Define the structure for the machine field in struct function. */
2301 struct GTY(()) stack_local_entry {
2302 unsigned short mode;
2303 unsigned short n;
2304 rtx rtl;
2305 struct stack_local_entry *next;
2308 /* Structure describing stack frame layout.
2309 Stack grows downward:
2311 [arguments]
2312 <- ARG_POINTER
2313 saved pc
2315 saved static chain if ix86_static_chain_on_stack
2317 saved frame pointer if frame_pointer_needed
2318 <- HARD_FRAME_POINTER
2319 [saved regs]
2320 <- regs_save_offset
2321 [padding0]
2323 [saved SSE regs]
2324 <- sse_regs_save_offset
2325 [padding1] |
2326 | <- FRAME_POINTER
2327 [va_arg registers] |
2329 [frame] |
2331 [padding2] | = to_allocate
2332 <- STACK_POINTER
2334 struct ix86_frame
2336 int nsseregs;
2337 int nregs;
2338 int va_arg_size;
2339 int red_zone_size;
2340 int outgoing_arguments_size;
2342 /* The offsets relative to ARG_POINTER. */
2343 HOST_WIDE_INT frame_pointer_offset;
2344 HOST_WIDE_INT hard_frame_pointer_offset;
2345 HOST_WIDE_INT stack_pointer_offset;
2346 HOST_WIDE_INT hfp_save_offset;
2347 HOST_WIDE_INT reg_save_offset;
2348 HOST_WIDE_INT sse_reg_save_offset;
2350 /* When save_regs_using_mov is set, emit prologue using
2351 move instead of push instructions. */
2352 bool save_regs_using_mov;
2355 /* Which cpu are we scheduling for. */
2356 enum attr_cpu ix86_schedule;
2358 /* Which cpu are we optimizing for. */
2359 enum processor_type ix86_tune;
2361 /* Which instruction set architecture to use. */
2362 enum processor_type ix86_arch;
2364 /* True if processor has SSE prefetch instruction. */
2365 unsigned char x86_prefetch_sse;
2367 /* -mstackrealign option */
2368 static const char ix86_force_align_arg_pointer_string[]
2369 = "force_align_arg_pointer";
2371 static rtx (*ix86_gen_leave) (void);
2372 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2373 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2374 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2375 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2376 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2377 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2378 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2379 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2380 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2381 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2382 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2384 /* Preferred alignment for stack boundary in bits. */
2385 unsigned int ix86_preferred_stack_boundary;
2387 /* Alignment for incoming stack boundary in bits specified at
2388 command line. */
2389 static unsigned int ix86_user_incoming_stack_boundary;
2391 /* Default alignment for incoming stack boundary in bits. */
2392 static unsigned int ix86_default_incoming_stack_boundary;
2394 /* Alignment for incoming stack boundary in bits. */
2395 unsigned int ix86_incoming_stack_boundary;
2397 /* Calling abi specific va_list type nodes. */
2398 static GTY(()) tree sysv_va_list_type_node;
2399 static GTY(()) tree ms_va_list_type_node;
2401 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2402 char internal_label_prefix[16];
2403 int internal_label_prefix_len;
2405 /* Fence to use after loop using movnt. */
2406 tree x86_mfence;
2408 /* Register class used for passing given 64bit part of the argument.
2409 These represent classes as documented by the PS ABI, with the exception
2410 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2411 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2413 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2414 whenever possible (upper half does contain padding). */
2415 enum x86_64_reg_class
2417 X86_64_NO_CLASS,
2418 X86_64_INTEGER_CLASS,
2419 X86_64_INTEGERSI_CLASS,
2420 X86_64_SSE_CLASS,
2421 X86_64_SSESF_CLASS,
2422 X86_64_SSEDF_CLASS,
2423 X86_64_SSEUP_CLASS,
2424 X86_64_X87_CLASS,
2425 X86_64_X87UP_CLASS,
2426 X86_64_COMPLEX_X87_CLASS,
2427 X86_64_MEMORY_CLASS
2430 #define MAX_CLASSES 8
2432 /* Table of constants used by fldpi, fldln2, etc.... */
2433 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2434 static bool ext_80387_constants_init = 0;
2437 static struct machine_function * ix86_init_machine_status (void);
2438 static rtx ix86_function_value (const_tree, const_tree, bool);
2439 static bool ix86_function_value_regno_p (const unsigned int);
2440 static unsigned int ix86_function_arg_boundary (machine_mode,
2441 const_tree);
2442 static rtx ix86_static_chain (const_tree, bool);
2443 static int ix86_function_regparm (const_tree, const_tree);
2444 static void ix86_compute_frame_layout (struct ix86_frame *);
2445 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2446 rtx, rtx, int);
2447 static void ix86_add_new_builtins (HOST_WIDE_INT);
2448 static tree ix86_canonical_va_list_type (tree);
2449 static void predict_jump (int);
2450 static unsigned int split_stack_prologue_scratch_regno (void);
2451 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2453 enum ix86_function_specific_strings
2455 IX86_FUNCTION_SPECIFIC_ARCH,
2456 IX86_FUNCTION_SPECIFIC_TUNE,
2457 IX86_FUNCTION_SPECIFIC_MAX
2460 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2461 const char *, enum fpmath_unit, bool);
2462 static void ix86_function_specific_save (struct cl_target_option *,
2463 struct gcc_options *opts);
2464 static void ix86_function_specific_restore (struct gcc_options *opts,
2465 struct cl_target_option *);
2466 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
2467 static void ix86_function_specific_print (FILE *, int,
2468 struct cl_target_option *);
2469 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2470 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2471 struct gcc_options *,
2472 struct gcc_options *,
2473 struct gcc_options *);
2474 static bool ix86_can_inline_p (tree, tree);
2475 static void ix86_set_current_function (tree);
2476 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2478 static enum calling_abi ix86_function_abi (const_tree);
2481 #ifndef SUBTARGET32_DEFAULT_CPU
2482 #define SUBTARGET32_DEFAULT_CPU "i386"
2483 #endif
2485 /* Whether -mtune= or -march= were specified */
2486 static int ix86_tune_defaulted;
2487 static int ix86_arch_specified;
2489 /* Vectorization library interface and handlers. */
2490 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2492 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2493 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2495 /* Processor target table, indexed by processor number */
2496 struct ptt
2498 const char *const name; /* processor name */
2499 const struct processor_costs *cost; /* Processor costs */
2500 const int align_loop; /* Default alignments. */
2501 const int align_loop_max_skip;
2502 const int align_jump;
2503 const int align_jump_max_skip;
2504 const int align_func;
2507 /* This table must be in sync with enum processor_type in i386.h. */
2508 static const struct ptt processor_target_table[PROCESSOR_max] =
2510 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2511 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2512 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2513 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2514 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2515 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2516 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2517 {"core2", &core_cost, 16, 10, 16, 10, 16},
2518 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2519 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2520 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2521 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2522 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2523 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2524 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2525 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2526 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2527 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2528 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2529 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2530 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2531 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2532 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2533 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2534 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2535 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2538 static unsigned int
2539 rest_of_handle_insert_vzeroupper (void)
2541 int i;
2543 /* vzeroupper instructions are inserted immediately after reload to
2544 account for possible spills from 256bit registers. The pass
2545 reuses mode switching infrastructure by re-running mode insertion
2546 pass, so disable entities that have already been processed. */
2547 for (i = 0; i < MAX_386_ENTITIES; i++)
2548 ix86_optimize_mode_switching[i] = 0;
2550 ix86_optimize_mode_switching[AVX_U128] = 1;
2552 /* Call optimize_mode_switching. */
2553 g->get_passes ()->execute_pass_mode_switching ();
2554 return 0;
2557 namespace {
2559 const pass_data pass_data_insert_vzeroupper =
2561 RTL_PASS, /* type */
2562 "vzeroupper", /* name */
2563 OPTGROUP_NONE, /* optinfo_flags */
2564 TV_NONE, /* tv_id */
2565 0, /* properties_required */
2566 0, /* properties_provided */
2567 0, /* properties_destroyed */
2568 0, /* todo_flags_start */
2569 TODO_df_finish, /* todo_flags_finish */
2572 class pass_insert_vzeroupper : public rtl_opt_pass
2574 public:
2575 pass_insert_vzeroupper(gcc::context *ctxt)
2576 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2579 /* opt_pass methods: */
2580 virtual bool gate (function *)
2582 return TARGET_AVX && !TARGET_AVX512F
2583 && TARGET_VZEROUPPER && flag_expensive_optimizations
2584 && !optimize_size;
2587 virtual unsigned int execute (function *)
2589 return rest_of_handle_insert_vzeroupper ();
2592 }; // class pass_insert_vzeroupper
2594 } // anon namespace
2596 rtl_opt_pass *
2597 make_pass_insert_vzeroupper (gcc::context *ctxt)
2599 return new pass_insert_vzeroupper (ctxt);
2602 /* Return true if a red-zone is in use. */
2604 static inline bool
2605 ix86_using_red_zone (void)
2607 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2610 /* Return a string that documents the current -m options. The caller is
2611 responsible for freeing the string. */
2613 static char *
2614 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2615 const char *tune, enum fpmath_unit fpmath,
2616 bool add_nl_p)
2618 struct ix86_target_opts
2620 const char *option; /* option string */
2621 HOST_WIDE_INT mask; /* isa mask options */
2624 /* This table is ordered so that options like -msse4.2 that imply
2625 preceding options while match those first. */
2626 static struct ix86_target_opts isa_opts[] =
2628 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2629 { "-mfma", OPTION_MASK_ISA_FMA },
2630 { "-mxop", OPTION_MASK_ISA_XOP },
2631 { "-mlwp", OPTION_MASK_ISA_LWP },
2632 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2633 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2634 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2635 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2636 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2637 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2638 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2639 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2640 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2641 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2642 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2643 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2644 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2645 { "-msse3", OPTION_MASK_ISA_SSE3 },
2646 { "-msse2", OPTION_MASK_ISA_SSE2 },
2647 { "-msse", OPTION_MASK_ISA_SSE },
2648 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2649 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2650 { "-mmmx", OPTION_MASK_ISA_MMX },
2651 { "-mabm", OPTION_MASK_ISA_ABM },
2652 { "-mbmi", OPTION_MASK_ISA_BMI },
2653 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2654 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2655 { "-mhle", OPTION_MASK_ISA_HLE },
2656 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2657 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2658 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2659 { "-madx", OPTION_MASK_ISA_ADX },
2660 { "-mtbm", OPTION_MASK_ISA_TBM },
2661 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2662 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2663 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2664 { "-maes", OPTION_MASK_ISA_AES },
2665 { "-msha", OPTION_MASK_ISA_SHA },
2666 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2667 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2668 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2669 { "-mf16c", OPTION_MASK_ISA_F16C },
2670 { "-mrtm", OPTION_MASK_ISA_RTM },
2671 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2672 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2673 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2674 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2675 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2676 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2677 { "-mmpx", OPTION_MASK_ISA_MPX },
2678 { "-mclwb", OPTION_MASK_ISA_CLWB },
2679 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
2682 /* Flag options. */
2683 static struct ix86_target_opts flag_opts[] =
2685 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2686 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2687 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2688 { "-m80387", MASK_80387 },
2689 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2690 { "-malign-double", MASK_ALIGN_DOUBLE },
2691 { "-mcld", MASK_CLD },
2692 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2693 { "-mieee-fp", MASK_IEEE_FP },
2694 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2695 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2696 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2697 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2698 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2699 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2700 { "-mno-red-zone", MASK_NO_RED_ZONE },
2701 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2702 { "-mrecip", MASK_RECIP },
2703 { "-mrtd", MASK_RTD },
2704 { "-msseregparm", MASK_SSEREGPARM },
2705 { "-mstack-arg-probe", MASK_STACK_PROBE },
2706 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2707 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2708 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2709 { "-mvzeroupper", MASK_VZEROUPPER },
2710 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2711 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2712 { "-mprefer-avx128", MASK_PREFER_AVX128},
2715 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2717 char isa_other[40];
2718 char target_other[40];
2719 unsigned num = 0;
2720 unsigned i, j;
2721 char *ret;
2722 char *ptr;
2723 size_t len;
2724 size_t line_len;
2725 size_t sep_len;
2726 const char *abi;
2728 memset (opts, '\0', sizeof (opts));
2730 /* Add -march= option. */
2731 if (arch)
2733 opts[num][0] = "-march=";
2734 opts[num++][1] = arch;
2737 /* Add -mtune= option. */
2738 if (tune)
2740 opts[num][0] = "-mtune=";
2741 opts[num++][1] = tune;
2744 /* Add -m32/-m64/-mx32. */
2745 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2747 if ((isa & OPTION_MASK_ABI_64) != 0)
2748 abi = "-m64";
2749 else
2750 abi = "-mx32";
2751 isa &= ~ (OPTION_MASK_ISA_64BIT
2752 | OPTION_MASK_ABI_64
2753 | OPTION_MASK_ABI_X32);
2755 else
2756 abi = "-m32";
2757 opts[num++][0] = abi;
2759 /* Pick out the options in isa options. */
2760 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2762 if ((isa & isa_opts[i].mask) != 0)
2764 opts[num++][0] = isa_opts[i].option;
2765 isa &= ~ isa_opts[i].mask;
2769 if (isa && add_nl_p)
2771 opts[num++][0] = isa_other;
2772 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2773 isa);
2776 /* Add flag options. */
2777 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2779 if ((flags & flag_opts[i].mask) != 0)
2781 opts[num++][0] = flag_opts[i].option;
2782 flags &= ~ flag_opts[i].mask;
2786 if (flags && add_nl_p)
2788 opts[num++][0] = target_other;
2789 sprintf (target_other, "(other flags: %#x)", flags);
2792 /* Add -fpmath= option. */
2793 if (fpmath)
2795 opts[num][0] = "-mfpmath=";
2796 switch ((int) fpmath)
2798 case FPMATH_387:
2799 opts[num++][1] = "387";
2800 break;
2802 case FPMATH_SSE:
2803 opts[num++][1] = "sse";
2804 break;
2806 case FPMATH_387 | FPMATH_SSE:
2807 opts[num++][1] = "sse+387";
2808 break;
2810 default:
2811 gcc_unreachable ();
2815 /* Any options? */
2816 if (num == 0)
2817 return NULL;
2819 gcc_assert (num < ARRAY_SIZE (opts));
2821 /* Size the string. */
2822 len = 0;
2823 sep_len = (add_nl_p) ? 3 : 1;
2824 for (i = 0; i < num; i++)
2826 len += sep_len;
2827 for (j = 0; j < 2; j++)
2828 if (opts[i][j])
2829 len += strlen (opts[i][j]);
2832 /* Build the string. */
2833 ret = ptr = (char *) xmalloc (len);
2834 line_len = 0;
2836 for (i = 0; i < num; i++)
2838 size_t len2[2];
2840 for (j = 0; j < 2; j++)
2841 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2843 if (i != 0)
2845 *ptr++ = ' ';
2846 line_len++;
2848 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2850 *ptr++ = '\\';
2851 *ptr++ = '\n';
2852 line_len = 0;
2856 for (j = 0; j < 2; j++)
2857 if (opts[i][j])
2859 memcpy (ptr, opts[i][j], len2[j]);
2860 ptr += len2[j];
2861 line_len += len2[j];
2865 *ptr = '\0';
2866 gcc_assert (ret + len >= ptr);
2868 return ret;
2871 /* Return true, if profiling code should be emitted before
2872 prologue. Otherwise it returns false.
2873 Note: For x86 with "hotfix" it is sorried. */
2874 static bool
2875 ix86_profile_before_prologue (void)
2877 return flag_fentry != 0;
2880 /* Function that is callable from the debugger to print the current
2881 options. */
2882 void ATTRIBUTE_UNUSED
2883 ix86_debug_options (void)
2885 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2886 ix86_arch_string, ix86_tune_string,
2887 ix86_fpmath, true);
2889 if (opts)
2891 fprintf (stderr, "%s\n\n", opts);
2892 free (opts);
2894 else
2895 fputs ("<no options>\n\n", stderr);
2897 return;
2900 static const char *stringop_alg_names[] = {
2901 #define DEF_ENUM
2902 #define DEF_ALG(alg, name) #name,
2903 #include "stringop.def"
2904 #undef DEF_ENUM
2905 #undef DEF_ALG
2908 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2909 The string is of the following form (or comma separated list of it):
2911 strategy_alg:max_size:[align|noalign]
2913 where the full size range for the strategy is either [0, max_size] or
2914 [min_size, max_size], in which min_size is the max_size + 1 of the
2915 preceding range. The last size range must have max_size == -1.
2917 Examples:
2920 -mmemcpy-strategy=libcall:-1:noalign
2922 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2926 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2928 This is to tell the compiler to use the following strategy for memset
2929 1) when the expected size is between [1, 16], use rep_8byte strategy;
2930 2) when the size is between [17, 2048], use vector_loop;
2931 3) when the size is > 2048, use libcall. */
2933 struct stringop_size_range
2935 int max;
2936 stringop_alg alg;
2937 bool noalign;
2940 static void
2941 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2943 const struct stringop_algs *default_algs;
2944 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2945 char *curr_range_str, *next_range_str;
2946 int i = 0, n = 0;
2948 if (is_memset)
2949 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2950 else
2951 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2953 curr_range_str = strategy_str;
2957 int maxs;
2958 char alg_name[128];
2959 char align[16];
2960 next_range_str = strchr (curr_range_str, ',');
2961 if (next_range_str)
2962 *next_range_str++ = '\0';
2964 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2965 alg_name, &maxs, align))
2967 error ("wrong arg %s to option %s", curr_range_str,
2968 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2969 return;
2972 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2974 error ("size ranges of option %s should be increasing",
2975 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2976 return;
2979 for (i = 0; i < last_alg; i++)
2980 if (!strcmp (alg_name, stringop_alg_names[i]))
2981 break;
2983 if (i == last_alg)
2985 error ("wrong stringop strategy name %s specified for option %s",
2986 alg_name,
2987 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2988 return;
2991 if ((stringop_alg) i == rep_prefix_8_byte
2992 && !TARGET_64BIT)
2994 /* rep; movq isn't available in 32-bit code. */
2995 error ("stringop strategy name %s specified for option %s "
2996 "not supported for 32-bit code",
2997 alg_name,
2998 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2999 return;
3002 input_ranges[n].max = maxs;
3003 input_ranges[n].alg = (stringop_alg) i;
3004 if (!strcmp (align, "align"))
3005 input_ranges[n].noalign = false;
3006 else if (!strcmp (align, "noalign"))
3007 input_ranges[n].noalign = true;
3008 else
3010 error ("unknown alignment %s specified for option %s",
3011 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3012 return;
3014 n++;
3015 curr_range_str = next_range_str;
3017 while (curr_range_str);
3019 if (input_ranges[n - 1].max != -1)
3021 error ("the max value for the last size range should be -1"
3022 " for option %s",
3023 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3024 return;
3027 if (n > MAX_STRINGOP_ALGS)
3029 error ("too many size ranges specified in option %s",
3030 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3031 return;
3034 /* Now override the default algs array. */
3035 for (i = 0; i < n; i++)
3037 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3038 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3039 = input_ranges[i].alg;
3040 *const_cast<int *>(&default_algs->size[i].noalign)
3041 = input_ranges[i].noalign;
3046 /* parse -mtune-ctrl= option. When DUMP is true,
3047 print the features that are explicitly set. */
3049 static void
3050 parse_mtune_ctrl_str (bool dump)
3052 if (!ix86_tune_ctrl_string)
3053 return;
3055 char *next_feature_string = NULL;
3056 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3057 char *orig = curr_feature_string;
3058 int i;
3061 bool clear = false;
3063 next_feature_string = strchr (curr_feature_string, ',');
3064 if (next_feature_string)
3065 *next_feature_string++ = '\0';
3066 if (*curr_feature_string == '^')
3068 curr_feature_string++;
3069 clear = true;
3071 for (i = 0; i < X86_TUNE_LAST; i++)
3073 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3075 ix86_tune_features[i] = !clear;
3076 if (dump)
3077 fprintf (stderr, "Explicitly %s feature %s\n",
3078 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3079 break;
3082 if (i == X86_TUNE_LAST)
3083 error ("Unknown parameter to option -mtune-ctrl: %s",
3084 clear ? curr_feature_string - 1 : curr_feature_string);
3085 curr_feature_string = next_feature_string;
3087 while (curr_feature_string);
3088 free (orig);
3091 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3092 processor type. */
3094 static void
3095 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3097 unsigned int ix86_tune_mask = 1u << ix86_tune;
3098 int i;
3100 for (i = 0; i < X86_TUNE_LAST; ++i)
3102 if (ix86_tune_no_default)
3103 ix86_tune_features[i] = 0;
3104 else
3105 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3108 if (dump)
3110 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3111 for (i = 0; i < X86_TUNE_LAST; i++)
3112 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3113 ix86_tune_features[i] ? "on" : "off");
3116 parse_mtune_ctrl_str (dump);
3120 /* Default align_* from the processor table. */
3122 static void
3123 ix86_default_align (struct gcc_options *opts)
3125 if (opts->x_align_loops == 0)
3127 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3128 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3130 if (opts->x_align_jumps == 0)
3132 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3133 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3135 if (opts->x_align_functions == 0)
3137 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3141 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
3143 static void
3144 ix86_override_options_after_change (void)
3146 ix86_default_align (&global_options);
3149 /* Override various settings based on options. If MAIN_ARGS_P, the
3150 options are from the command line, otherwise they are from
3151 attributes. */
3153 static void
3154 ix86_option_override_internal (bool main_args_p,
3155 struct gcc_options *opts,
3156 struct gcc_options *opts_set)
3158 int i;
3159 unsigned int ix86_arch_mask;
3160 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3161 const char *prefix;
3162 const char *suffix;
3163 const char *sw;
3165 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3166 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3167 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3168 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3169 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3170 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3171 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3172 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3173 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3174 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3175 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3176 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3177 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3178 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3179 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3180 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3181 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3182 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3183 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3184 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3185 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3186 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3187 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3188 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3189 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3190 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3191 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3192 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3193 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3194 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3195 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3196 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3197 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3198 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3199 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3200 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3201 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3202 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3203 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3204 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3205 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3206 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3207 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3208 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3209 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3210 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3211 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3212 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3213 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3214 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3215 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3216 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3217 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3218 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3219 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3220 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3221 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
3223 #define PTA_CORE2 \
3224 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3225 | PTA_CX16 | PTA_FXSR)
3226 #define PTA_NEHALEM \
3227 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3228 #define PTA_WESTMERE \
3229 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3230 #define PTA_SANDYBRIDGE \
3231 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3232 #define PTA_IVYBRIDGE \
3233 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3234 #define PTA_HASWELL \
3235 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3236 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3237 #define PTA_BROADWELL \
3238 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3239 #define PTA_KNL \
3240 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3241 #define PTA_BONNELL \
3242 (PTA_CORE2 | PTA_MOVBE)
3243 #define PTA_SILVERMONT \
3244 (PTA_WESTMERE | PTA_MOVBE)
3246 /* if this reaches 64, need to widen struct pta flags below */
3248 static struct pta
3250 const char *const name; /* processor name or nickname. */
3251 const enum processor_type processor;
3252 const enum attr_cpu schedule;
3253 const unsigned HOST_WIDE_INT flags;
3255 const processor_alias_table[] =
3257 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3258 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3259 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3260 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3261 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3262 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3263 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3264 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3265 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3266 PTA_MMX | PTA_SSE | PTA_FXSR},
3267 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3268 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3269 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3270 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3271 PTA_MMX | PTA_SSE | PTA_FXSR},
3272 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3273 PTA_MMX | PTA_SSE | PTA_FXSR},
3274 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3275 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3276 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3277 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3278 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3279 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3280 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3281 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3282 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3283 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3284 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3285 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3286 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3287 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3288 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3289 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3290 PTA_SANDYBRIDGE},
3291 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3292 PTA_SANDYBRIDGE},
3293 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3294 PTA_IVYBRIDGE},
3295 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3296 PTA_IVYBRIDGE},
3297 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3298 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3299 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3300 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3301 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3302 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3303 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3304 {"knl", PROCESSOR_KNL, CPU_KNL, PTA_KNL},
3305 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3306 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3307 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3308 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3309 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3310 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3311 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3312 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3313 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3314 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3315 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3316 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3317 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3318 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3319 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3320 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3321 {"x86-64", PROCESSOR_K8, CPU_K8,
3322 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3323 {"k8", PROCESSOR_K8, CPU_K8,
3324 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3325 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3326 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3327 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3328 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3329 {"opteron", PROCESSOR_K8, CPU_K8,
3330 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3331 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3332 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3333 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3334 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3335 {"athlon64", PROCESSOR_K8, CPU_K8,
3336 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3337 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3338 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3339 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3340 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3341 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3342 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3343 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3344 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3345 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3346 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3347 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3348 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3349 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3350 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3351 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3352 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3353 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3354 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3355 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3356 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3357 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3358 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3359 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3360 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3361 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3362 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3363 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3364 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3365 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3366 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3367 | PTA_XSAVEOPT | PTA_FSGSBASE},
3368 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3369 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3370 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3371 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3372 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3373 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3374 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3375 | PTA_MOVBE},
3376 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3377 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3378 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3379 | PTA_FXSR | PTA_XSAVE},
3380 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3381 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3382 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3383 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3384 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3385 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3387 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3388 PTA_64BIT
3389 | PTA_HLE /* flags are only used for -march switch. */ },
3392 /* -mrecip options. */
3393 static struct
3395 const char *string; /* option name */
3396 unsigned int mask; /* mask bits to set */
3398 const recip_options[] =
3400 { "all", RECIP_MASK_ALL },
3401 { "none", RECIP_MASK_NONE },
3402 { "div", RECIP_MASK_DIV },
3403 { "sqrt", RECIP_MASK_SQRT },
3404 { "vec-div", RECIP_MASK_VEC_DIV },
3405 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3408 int const pta_size = ARRAY_SIZE (processor_alias_table);
3410 /* Set up prefix/suffix so the error messages refer to either the command
3411 line argument, or the attribute(target). */
3412 if (main_args_p)
3414 prefix = "-m";
3415 suffix = "";
3416 sw = "switch";
3418 else
3420 prefix = "option(\"";
3421 suffix = "\")";
3422 sw = "attribute";
3425 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3426 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3427 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3428 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3429 #ifdef TARGET_BI_ARCH
3430 else
3432 #if TARGET_BI_ARCH == 1
3433 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3434 is on and OPTION_MASK_ABI_X32 is off. We turn off
3435 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3436 -mx32. */
3437 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3438 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3439 #else
3440 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3441 on and OPTION_MASK_ABI_64 is off. We turn off
3442 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3443 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3444 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3445 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3446 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3447 #endif
3449 #endif
3451 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3453 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3454 OPTION_MASK_ABI_64 for TARGET_X32. */
3455 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3456 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3458 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3459 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3460 | OPTION_MASK_ABI_X32
3461 | OPTION_MASK_ABI_64);
3462 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3464 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3465 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3466 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3467 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3470 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3471 SUBTARGET_OVERRIDE_OPTIONS;
3472 #endif
3474 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3475 SUBSUBTARGET_OVERRIDE_OPTIONS;
3476 #endif
3478 /* -fPIC is the default for x86_64. */
3479 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3480 opts->x_flag_pic = 2;
3482 /* Need to check -mtune=generic first. */
3483 if (opts->x_ix86_tune_string)
3485 /* As special support for cross compilers we read -mtune=native
3486 as -mtune=generic. With native compilers we won't see the
3487 -mtune=native, as it was changed by the driver. */
3488 if (!strcmp (opts->x_ix86_tune_string, "native"))
3490 opts->x_ix86_tune_string = "generic";
3492 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3493 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3494 "%stune=k8%s or %stune=generic%s instead as appropriate",
3495 prefix, suffix, prefix, suffix, prefix, suffix);
3497 else
3499 if (opts->x_ix86_arch_string)
3500 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3501 if (!opts->x_ix86_tune_string)
3503 opts->x_ix86_tune_string
3504 = processor_target_table[TARGET_CPU_DEFAULT].name;
3505 ix86_tune_defaulted = 1;
3508 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3509 or defaulted. We need to use a sensible tune option. */
3510 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3512 opts->x_ix86_tune_string = "generic";
3516 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3517 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3519 /* rep; movq isn't available in 32-bit code. */
3520 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3521 opts->x_ix86_stringop_alg = no_stringop;
3524 if (!opts->x_ix86_arch_string)
3525 opts->x_ix86_arch_string
3526 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3527 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3528 else
3529 ix86_arch_specified = 1;
3531 if (opts_set->x_ix86_pmode)
3533 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3534 && opts->x_ix86_pmode == PMODE_SI)
3535 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3536 && opts->x_ix86_pmode == PMODE_DI))
3537 error ("address mode %qs not supported in the %s bit mode",
3538 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3539 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3541 else
3542 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3543 ? PMODE_DI : PMODE_SI;
3545 if (!opts_set->x_ix86_abi)
3546 opts->x_ix86_abi = DEFAULT_ABI;
3548 /* For targets using ms ABI enable ms-extensions, if not
3549 explicit turned off. For non-ms ABI we turn off this
3550 option. */
3551 if (!opts_set->x_flag_ms_extensions)
3552 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3554 if (opts_set->x_ix86_cmodel)
3556 switch (opts->x_ix86_cmodel)
3558 case CM_SMALL:
3559 case CM_SMALL_PIC:
3560 if (opts->x_flag_pic)
3561 opts->x_ix86_cmodel = CM_SMALL_PIC;
3562 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3563 error ("code model %qs not supported in the %s bit mode",
3564 "small", "32");
3565 break;
3567 case CM_MEDIUM:
3568 case CM_MEDIUM_PIC:
3569 if (opts->x_flag_pic)
3570 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3571 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3572 error ("code model %qs not supported in the %s bit mode",
3573 "medium", "32");
3574 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3575 error ("code model %qs not supported in x32 mode",
3576 "medium");
3577 break;
3579 case CM_LARGE:
3580 case CM_LARGE_PIC:
3581 if (opts->x_flag_pic)
3582 opts->x_ix86_cmodel = CM_LARGE_PIC;
3583 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3584 error ("code model %qs not supported in the %s bit mode",
3585 "large", "32");
3586 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3587 error ("code model %qs not supported in x32 mode",
3588 "large");
3589 break;
3591 case CM_32:
3592 if (opts->x_flag_pic)
3593 error ("code model %s does not support PIC mode", "32");
3594 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3595 error ("code model %qs not supported in the %s bit mode",
3596 "32", "64");
3597 break;
3599 case CM_KERNEL:
3600 if (opts->x_flag_pic)
3602 error ("code model %s does not support PIC mode", "kernel");
3603 opts->x_ix86_cmodel = CM_32;
3605 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3606 error ("code model %qs not supported in the %s bit mode",
3607 "kernel", "32");
3608 break;
3610 default:
3611 gcc_unreachable ();
3614 else
3616 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3617 use of rip-relative addressing. This eliminates fixups that
3618 would otherwise be needed if this object is to be placed in a
3619 DLL, and is essentially just as efficient as direct addressing. */
3620 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3621 && (TARGET_RDOS || TARGET_PECOFF))
3622 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3623 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3624 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3625 else
3626 opts->x_ix86_cmodel = CM_32;
3628 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3630 error ("-masm=intel not supported in this configuration");
3631 opts->x_ix86_asm_dialect = ASM_ATT;
3633 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3634 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3635 sorry ("%i-bit mode not compiled in",
3636 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3638 for (i = 0; i < pta_size; i++)
3639 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3641 ix86_schedule = processor_alias_table[i].schedule;
3642 ix86_arch = processor_alias_table[i].processor;
3643 /* Default cpu tuning to the architecture. */
3644 ix86_tune = ix86_arch;
3646 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3647 && !(processor_alias_table[i].flags & PTA_64BIT))
3648 error ("CPU you selected does not support x86-64 "
3649 "instruction set");
3651 if (processor_alias_table[i].flags & PTA_MMX
3652 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3653 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3654 if (processor_alias_table[i].flags & PTA_3DNOW
3655 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3656 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3657 if (processor_alias_table[i].flags & PTA_3DNOW_A
3658 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3659 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3660 if (processor_alias_table[i].flags & PTA_SSE
3661 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3662 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3663 if (processor_alias_table[i].flags & PTA_SSE2
3664 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3665 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3666 if (processor_alias_table[i].flags & PTA_SSE3
3667 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3668 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3669 if (processor_alias_table[i].flags & PTA_SSSE3
3670 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3671 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3672 if (processor_alias_table[i].flags & PTA_SSE4_1
3673 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3674 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3675 if (processor_alias_table[i].flags & PTA_SSE4_2
3676 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3677 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3678 if (processor_alias_table[i].flags & PTA_AVX
3679 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3680 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3681 if (processor_alias_table[i].flags & PTA_AVX2
3682 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3683 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3684 if (processor_alias_table[i].flags & PTA_FMA
3685 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3686 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3687 if (processor_alias_table[i].flags & PTA_SSE4A
3688 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3689 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3690 if (processor_alias_table[i].flags & PTA_FMA4
3691 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3692 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3693 if (processor_alias_table[i].flags & PTA_XOP
3694 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3695 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3696 if (processor_alias_table[i].flags & PTA_LWP
3697 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3698 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3699 if (processor_alias_table[i].flags & PTA_ABM
3700 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3701 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3702 if (processor_alias_table[i].flags & PTA_BMI
3703 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3704 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3705 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3706 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3707 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3708 if (processor_alias_table[i].flags & PTA_TBM
3709 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3710 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3711 if (processor_alias_table[i].flags & PTA_BMI2
3712 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3713 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3714 if (processor_alias_table[i].flags & PTA_CX16
3715 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3716 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3717 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3718 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3719 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3720 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3721 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3722 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3723 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3724 if (processor_alias_table[i].flags & PTA_MOVBE
3725 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3726 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3727 if (processor_alias_table[i].flags & PTA_AES
3728 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3729 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3730 if (processor_alias_table[i].flags & PTA_SHA
3731 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3732 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3733 if (processor_alias_table[i].flags & PTA_PCLMUL
3734 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3735 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3736 if (processor_alias_table[i].flags & PTA_FSGSBASE
3737 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3738 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3739 if (processor_alias_table[i].flags & PTA_RDRND
3740 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3741 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3742 if (processor_alias_table[i].flags & PTA_F16C
3743 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3744 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3745 if (processor_alias_table[i].flags & PTA_RTM
3746 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3747 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3748 if (processor_alias_table[i].flags & PTA_HLE
3749 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3750 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3751 if (processor_alias_table[i].flags & PTA_PRFCHW
3752 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3753 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3754 if (processor_alias_table[i].flags & PTA_RDSEED
3755 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3756 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3757 if (processor_alias_table[i].flags & PTA_ADX
3758 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3759 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3760 if (processor_alias_table[i].flags & PTA_FXSR
3761 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3762 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3763 if (processor_alias_table[i].flags & PTA_XSAVE
3764 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3765 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3766 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3767 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3768 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3769 if (processor_alias_table[i].flags & PTA_AVX512F
3770 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3771 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3772 if (processor_alias_table[i].flags & PTA_AVX512ER
3773 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3774 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3775 if (processor_alias_table[i].flags & PTA_AVX512PF
3776 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3777 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3778 if (processor_alias_table[i].flags & PTA_AVX512CD
3779 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3780 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3781 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3782 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3783 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3784 if (processor_alias_table[i].flags & PTA_PCOMMIT
3785 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
3786 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
3787 if (processor_alias_table[i].flags & PTA_CLWB
3788 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3789 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3790 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3791 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3792 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3793 if (processor_alias_table[i].flags & PTA_XSAVEC
3794 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3795 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3796 if (processor_alias_table[i].flags & PTA_XSAVES
3797 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3798 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3799 if (processor_alias_table[i].flags & PTA_AVX512DQ
3800 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3801 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3802 if (processor_alias_table[i].flags & PTA_AVX512BW
3803 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3804 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3805 if (processor_alias_table[i].flags & PTA_AVX512VL
3806 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3807 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3808 if (processor_alias_table[i].flags & PTA_MPX
3809 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3810 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3811 if (processor_alias_table[i].flags & PTA_AVX512VBMI
3812 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3813 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3814 if (processor_alias_table[i].flags & PTA_AVX512IFMA
3815 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3816 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3817 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3818 x86_prefetch_sse = true;
3820 break;
3823 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3824 error ("Intel MPX does not support x32");
3826 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3827 error ("Intel MPX does not support x32");
3829 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3830 error ("generic CPU can be used only for %stune=%s %s",
3831 prefix, suffix, sw);
3832 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3833 error ("intel CPU can be used only for %stune=%s %s",
3834 prefix, suffix, sw);
3835 else if (i == pta_size)
3836 error ("bad value (%s) for %sarch=%s %s",
3837 opts->x_ix86_arch_string, prefix, suffix, sw);
3839 ix86_arch_mask = 1u << ix86_arch;
3840 for (i = 0; i < X86_ARCH_LAST; ++i)
3841 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3843 for (i = 0; i < pta_size; i++)
3844 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3846 ix86_schedule = processor_alias_table[i].schedule;
3847 ix86_tune = processor_alias_table[i].processor;
3848 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3850 if (!(processor_alias_table[i].flags & PTA_64BIT))
3852 if (ix86_tune_defaulted)
3854 opts->x_ix86_tune_string = "x86-64";
3855 for (i = 0; i < pta_size; i++)
3856 if (! strcmp (opts->x_ix86_tune_string,
3857 processor_alias_table[i].name))
3858 break;
3859 ix86_schedule = processor_alias_table[i].schedule;
3860 ix86_tune = processor_alias_table[i].processor;
3862 else
3863 error ("CPU you selected does not support x86-64 "
3864 "instruction set");
3867 /* Intel CPUs have always interpreted SSE prefetch instructions as
3868 NOPs; so, we can enable SSE prefetch instructions even when
3869 -mtune (rather than -march) points us to a processor that has them.
3870 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3871 higher processors. */
3872 if (TARGET_CMOV
3873 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3874 x86_prefetch_sse = true;
3875 break;
3878 if (ix86_tune_specified && i == pta_size)
3879 error ("bad value (%s) for %stune=%s %s",
3880 opts->x_ix86_tune_string, prefix, suffix, sw);
3882 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3884 #ifndef USE_IX86_FRAME_POINTER
3885 #define USE_IX86_FRAME_POINTER 0
3886 #endif
3888 #ifndef USE_X86_64_FRAME_POINTER
3889 #define USE_X86_64_FRAME_POINTER 0
3890 #endif
3892 /* Set the default values for switches whose default depends on TARGET_64BIT
3893 in case they weren't overwritten by command line options. */
3894 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3896 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3897 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3898 if (opts->x_flag_asynchronous_unwind_tables
3899 && !opts_set->x_flag_unwind_tables
3900 && TARGET_64BIT_MS_ABI)
3901 opts->x_flag_unwind_tables = 1;
3902 if (opts->x_flag_asynchronous_unwind_tables == 2)
3903 opts->x_flag_unwind_tables
3904 = opts->x_flag_asynchronous_unwind_tables = 1;
3905 if (opts->x_flag_pcc_struct_return == 2)
3906 opts->x_flag_pcc_struct_return = 0;
3908 else
3910 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3911 opts->x_flag_omit_frame_pointer
3912 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3913 if (opts->x_flag_asynchronous_unwind_tables == 2)
3914 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3915 if (opts->x_flag_pcc_struct_return == 2)
3916 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3919 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3920 /* TODO: ix86_cost should be chosen at instruction or function granuality
3921 so for cold code we use size_cost even in !optimize_size compilation. */
3922 if (opts->x_optimize_size)
3923 ix86_cost = &ix86_size_cost;
3924 else
3925 ix86_cost = ix86_tune_cost;
3927 /* Arrange to set up i386_stack_locals for all functions. */
3928 init_machine_status = ix86_init_machine_status;
3930 /* Validate -mregparm= value. */
3931 if (opts_set->x_ix86_regparm)
3933 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3934 warning (0, "-mregparm is ignored in 64-bit mode");
3935 if (opts->x_ix86_regparm > REGPARM_MAX)
3937 error ("-mregparm=%d is not between 0 and %d",
3938 opts->x_ix86_regparm, REGPARM_MAX);
3939 opts->x_ix86_regparm = 0;
3942 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3943 opts->x_ix86_regparm = REGPARM_MAX;
3945 /* Default align_* from the processor table. */
3946 ix86_default_align (opts);
3948 /* Provide default for -mbranch-cost= value. */
3949 if (!opts_set->x_ix86_branch_cost)
3950 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
3952 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3954 opts->x_target_flags
3955 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3957 /* Enable by default the SSE and MMX builtins. Do allow the user to
3958 explicitly disable any of these. In particular, disabling SSE and
3959 MMX for kernel code is extremely useful. */
3960 if (!ix86_arch_specified)
3961 opts->x_ix86_isa_flags
3962 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3963 | TARGET_SUBTARGET64_ISA_DEFAULT)
3964 & ~opts->x_ix86_isa_flags_explicit);
3966 if (TARGET_RTD_P (opts->x_target_flags))
3967 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3969 else
3971 opts->x_target_flags
3972 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3974 if (!ix86_arch_specified)
3975 opts->x_ix86_isa_flags
3976 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3978 /* i386 ABI does not specify red zone. It still makes sense to use it
3979 when programmer takes care to stack from being destroyed. */
3980 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3981 opts->x_target_flags |= MASK_NO_RED_ZONE;
3984 /* Keep nonleaf frame pointers. */
3985 if (opts->x_flag_omit_frame_pointer)
3986 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3987 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3988 opts->x_flag_omit_frame_pointer = 1;
3990 /* If we're doing fast math, we don't care about comparison order
3991 wrt NaNs. This lets us use a shorter comparison sequence. */
3992 if (opts->x_flag_finite_math_only)
3993 opts->x_target_flags &= ~MASK_IEEE_FP;
3995 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3996 since the insns won't need emulation. */
3997 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3998 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
4000 /* Likewise, if the target doesn't have a 387, or we've specified
4001 software floating point, don't use 387 inline intrinsics. */
4002 if (!TARGET_80387_P (opts->x_target_flags))
4003 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
4005 /* Turn on MMX builtins for -msse. */
4006 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
4007 opts->x_ix86_isa_flags
4008 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
4010 /* Enable SSE prefetch. */
4011 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
4012 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
4013 x86_prefetch_sse = true;
4015 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
4016 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
4017 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
4018 opts->x_ix86_isa_flags
4019 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
4021 /* Enable popcnt instruction for -msse4.2 or -mabm. */
4022 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
4023 || TARGET_ABM_P (opts->x_ix86_isa_flags))
4024 opts->x_ix86_isa_flags
4025 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
4027 /* Enable lzcnt instruction for -mabm. */
4028 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4029 opts->x_ix86_isa_flags
4030 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4032 /* Validate -mpreferred-stack-boundary= value or default it to
4033 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4034 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4035 if (opts_set->x_ix86_preferred_stack_boundary_arg)
4037 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4038 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
4039 int max = (TARGET_SEH ? 4 : 12);
4041 if (opts->x_ix86_preferred_stack_boundary_arg < min
4042 || opts->x_ix86_preferred_stack_boundary_arg > max)
4044 if (min == max)
4045 error ("-mpreferred-stack-boundary is not supported "
4046 "for this target");
4047 else
4048 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4049 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4051 else
4052 ix86_preferred_stack_boundary
4053 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4056 /* Set the default value for -mstackrealign. */
4057 if (opts->x_ix86_force_align_arg_pointer == -1)
4058 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4060 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4062 /* Validate -mincoming-stack-boundary= value or default it to
4063 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4064 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4065 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4067 if (opts->x_ix86_incoming_stack_boundary_arg
4068 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
4069 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4070 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4071 opts->x_ix86_incoming_stack_boundary_arg,
4072 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
4073 else
4075 ix86_user_incoming_stack_boundary
4076 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4077 ix86_incoming_stack_boundary
4078 = ix86_user_incoming_stack_boundary;
4082 #ifndef NO_PROFILE_COUNTERS
4083 if (flag_nop_mcount)
4084 error ("-mnop-mcount is not compatible with this target");
4085 #endif
4086 if (flag_nop_mcount && flag_pic)
4087 error ("-mnop-mcount is not implemented for -fPIC");
4089 /* Accept -msseregparm only if at least SSE support is enabled. */
4090 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4091 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4092 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4094 if (opts_set->x_ix86_fpmath)
4096 if (opts->x_ix86_fpmath & FPMATH_SSE)
4098 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4100 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4101 opts->x_ix86_fpmath = FPMATH_387;
4103 else if ((opts->x_ix86_fpmath & FPMATH_387)
4104 && !TARGET_80387_P (opts->x_target_flags))
4106 warning (0, "387 instruction set disabled, using SSE arithmetics");
4107 opts->x_ix86_fpmath = FPMATH_SSE;
4111 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4112 fpmath=387. The second is however default at many targets since the
4113 extra 80bit precision of temporaries is considered to be part of ABI.
4114 Overwrite the default at least for -ffast-math.
4115 TODO: -mfpmath=both seems to produce same performing code with bit
4116 smaller binaries. It is however not clear if register allocation is
4117 ready for this setting.
4118 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4119 codegen. We may switch to 387 with -ffast-math for size optimized
4120 functions. */
4121 else if (fast_math_flags_set_p (&global_options)
4122 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4123 opts->x_ix86_fpmath = FPMATH_SSE;
4124 else
4125 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4127 /* If the i387 is disabled, then do not return values in it. */
4128 if (!TARGET_80387_P (opts->x_target_flags))
4129 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4131 /* Use external vectorized library in vectorizing intrinsics. */
4132 if (opts_set->x_ix86_veclibabi_type)
4133 switch (opts->x_ix86_veclibabi_type)
4135 case ix86_veclibabi_type_svml:
4136 ix86_veclib_handler = ix86_veclibabi_svml;
4137 break;
4139 case ix86_veclibabi_type_acml:
4140 ix86_veclib_handler = ix86_veclibabi_acml;
4141 break;
4143 default:
4144 gcc_unreachable ();
4147 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4148 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4149 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4151 /* If stack probes are required, the space used for large function
4152 arguments on the stack must also be probed, so enable
4153 -maccumulate-outgoing-args so this happens in the prologue. */
4154 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4155 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4157 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4158 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4159 "for correctness", prefix, suffix);
4160 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4163 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4165 char *p;
4166 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4167 p = strchr (internal_label_prefix, 'X');
4168 internal_label_prefix_len = p - internal_label_prefix;
4169 *p = '\0';
4172 /* When scheduling description is not available, disable scheduler pass
4173 so it won't slow down the compilation and make x87 code slower. */
4174 if (!TARGET_SCHEDULE)
4175 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4177 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4178 ix86_tune_cost->simultaneous_prefetches,
4179 opts->x_param_values,
4180 opts_set->x_param_values);
4181 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4182 ix86_tune_cost->prefetch_block,
4183 opts->x_param_values,
4184 opts_set->x_param_values);
4185 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4186 ix86_tune_cost->l1_cache_size,
4187 opts->x_param_values,
4188 opts_set->x_param_values);
4189 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4190 ix86_tune_cost->l2_cache_size,
4191 opts->x_param_values,
4192 opts_set->x_param_values);
4194 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4195 if (opts->x_flag_prefetch_loop_arrays < 0
4196 && HAVE_prefetch
4197 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4198 && !opts->x_optimize_size
4199 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4200 opts->x_flag_prefetch_loop_arrays = 1;
4202 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4203 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4204 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4205 targetm.expand_builtin_va_start = NULL;
4207 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4209 ix86_gen_leave = gen_leave_rex64;
4210 if (Pmode == DImode)
4212 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4213 ix86_gen_tls_local_dynamic_base_64
4214 = gen_tls_local_dynamic_base_64_di;
4216 else
4218 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4219 ix86_gen_tls_local_dynamic_base_64
4220 = gen_tls_local_dynamic_base_64_si;
4223 else
4224 ix86_gen_leave = gen_leave;
4226 if (Pmode == DImode)
4228 ix86_gen_add3 = gen_adddi3;
4229 ix86_gen_sub3 = gen_subdi3;
4230 ix86_gen_sub3_carry = gen_subdi3_carry;
4231 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4232 ix86_gen_andsp = gen_anddi3;
4233 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4234 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4235 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4236 ix86_gen_monitor = gen_sse3_monitor_di;
4238 else
4240 ix86_gen_add3 = gen_addsi3;
4241 ix86_gen_sub3 = gen_subsi3;
4242 ix86_gen_sub3_carry = gen_subsi3_carry;
4243 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4244 ix86_gen_andsp = gen_andsi3;
4245 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4246 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4247 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4248 ix86_gen_monitor = gen_sse3_monitor_si;
4251 #ifdef USE_IX86_CLD
4252 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4253 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4254 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4255 #endif
4257 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4259 if (opts->x_flag_fentry > 0)
4260 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4261 "with -fpic");
4262 opts->x_flag_fentry = 0;
4264 else if (TARGET_SEH)
4266 if (opts->x_flag_fentry == 0)
4267 sorry ("-mno-fentry isn%'t compatible with SEH");
4268 opts->x_flag_fentry = 1;
4270 else if (opts->x_flag_fentry < 0)
4272 #if defined(PROFILE_BEFORE_PROLOGUE)
4273 opts->x_flag_fentry = 1;
4274 #else
4275 opts->x_flag_fentry = 0;
4276 #endif
4279 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
4280 opts->x_target_flags |= MASK_VZEROUPPER;
4281 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4282 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4283 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4284 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4285 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4286 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4287 /* Enable 128-bit AVX instruction generation
4288 for the auto-vectorizer. */
4289 if (TARGET_AVX128_OPTIMAL
4290 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4291 opts->x_target_flags |= MASK_PREFER_AVX128;
4293 if (opts->x_ix86_recip_name)
4295 char *p = ASTRDUP (opts->x_ix86_recip_name);
4296 char *q;
4297 unsigned int mask, i;
4298 bool invert;
4300 while ((q = strtok (p, ",")) != NULL)
4302 p = NULL;
4303 if (*q == '!')
4305 invert = true;
4306 q++;
4308 else
4309 invert = false;
4311 if (!strcmp (q, "default"))
4312 mask = RECIP_MASK_ALL;
4313 else
4315 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4316 if (!strcmp (q, recip_options[i].string))
4318 mask = recip_options[i].mask;
4319 break;
4322 if (i == ARRAY_SIZE (recip_options))
4324 error ("unknown option for -mrecip=%s", q);
4325 invert = false;
4326 mask = RECIP_MASK_NONE;
4330 opts->x_recip_mask_explicit |= mask;
4331 if (invert)
4332 opts->x_recip_mask &= ~mask;
4333 else
4334 opts->x_recip_mask |= mask;
4338 if (TARGET_RECIP_P (opts->x_target_flags))
4339 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4340 else if (opts_set->x_target_flags & MASK_RECIP)
4341 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4343 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4344 for 64-bit Bionic. */
4345 if (TARGET_HAS_BIONIC
4346 && !(opts_set->x_target_flags
4347 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4348 opts->x_target_flags |= (TARGET_64BIT
4349 ? MASK_LONG_DOUBLE_128
4350 : MASK_LONG_DOUBLE_64);
4352 /* Only one of them can be active. */
4353 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4354 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4356 /* Save the initial options in case the user does function specific
4357 options. */
4358 if (main_args_p)
4359 target_option_default_node = target_option_current_node
4360 = build_target_option_node (opts);
4362 /* Handle stack protector */
4363 if (!opts_set->x_ix86_stack_protector_guard)
4364 opts->x_ix86_stack_protector_guard
4365 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4367 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4368 if (opts->x_ix86_tune_memcpy_strategy)
4370 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4371 ix86_parse_stringop_strategy_string (str, false);
4372 free (str);
4375 if (opts->x_ix86_tune_memset_strategy)
4377 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4378 ix86_parse_stringop_strategy_string (str, true);
4379 free (str);
4383 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4385 static void
4386 ix86_option_override (void)
4388 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4389 struct register_pass_info insert_vzeroupper_info
4390 = { pass_insert_vzeroupper, "reload",
4391 1, PASS_POS_INSERT_AFTER
4394 ix86_option_override_internal (true, &global_options, &global_options_set);
4397 /* This needs to be done at start up. It's convenient to do it here. */
4398 register_pass (&insert_vzeroupper_info);
4401 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4402 static char *
4403 ix86_offload_options (void)
4405 if (TARGET_LP64)
4406 return xstrdup ("-foffload-abi=lp64");
4407 return xstrdup ("-foffload-abi=ilp32");
4410 /* Update register usage after having seen the compiler flags. */
4412 static void
4413 ix86_conditional_register_usage (void)
4415 int i, c_mask;
4417 /* For 32-bit targets, squash the REX registers. */
4418 if (! TARGET_64BIT)
4420 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4421 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4422 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4423 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4424 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4425 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4428 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4429 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4430 : TARGET_64BIT ? (1 << 2)
4431 : (1 << 1));
4433 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4435 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4437 /* Set/reset conditionally defined registers from
4438 CALL_USED_REGISTERS initializer. */
4439 if (call_used_regs[i] > 1)
4440 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4442 /* Calculate registers of CLOBBERED_REGS register set
4443 as call used registers from GENERAL_REGS register set. */
4444 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4445 && call_used_regs[i])
4446 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4449 /* If MMX is disabled, squash the registers. */
4450 if (! TARGET_MMX)
4451 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4452 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4453 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4455 /* If SSE is disabled, squash the registers. */
4456 if (! TARGET_SSE)
4457 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4458 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4459 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4461 /* If the FPU is disabled, squash the registers. */
4462 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4463 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4464 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4465 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4467 /* If AVX512F is disabled, squash the registers. */
4468 if (! TARGET_AVX512F)
4470 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4471 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4473 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4474 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4477 /* If MPX is disabled, squash the registers. */
4478 if (! TARGET_MPX)
4479 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4480 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4484 /* Save the current options */
4486 static void
4487 ix86_function_specific_save (struct cl_target_option *ptr,
4488 struct gcc_options *opts)
4490 ptr->arch = ix86_arch;
4491 ptr->schedule = ix86_schedule;
4492 ptr->prefetch_sse = x86_prefetch_sse;
4493 ptr->tune = ix86_tune;
4494 ptr->branch_cost = ix86_branch_cost;
4495 ptr->tune_defaulted = ix86_tune_defaulted;
4496 ptr->arch_specified = ix86_arch_specified;
4497 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4498 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4499 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4500 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4501 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4502 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4503 ptr->x_ix86_abi = opts->x_ix86_abi;
4504 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4505 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4506 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4507 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4508 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4509 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4510 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4511 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4512 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4513 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4514 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4515 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4516 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4517 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4518 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4519 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4520 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4521 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4522 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4523 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4525 /* The fields are char but the variables are not; make sure the
4526 values fit in the fields. */
4527 gcc_assert (ptr->arch == ix86_arch);
4528 gcc_assert (ptr->schedule == ix86_schedule);
4529 gcc_assert (ptr->tune == ix86_tune);
4530 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4533 /* Restore the current options */
4535 static void
4536 ix86_function_specific_restore (struct gcc_options *opts,
4537 struct cl_target_option *ptr)
4539 enum processor_type old_tune = ix86_tune;
4540 enum processor_type old_arch = ix86_arch;
4541 unsigned int ix86_arch_mask;
4542 int i;
4544 /* We don't change -fPIC. */
4545 opts->x_flag_pic = flag_pic;
4547 ix86_arch = (enum processor_type) ptr->arch;
4548 ix86_schedule = (enum attr_cpu) ptr->schedule;
4549 ix86_tune = (enum processor_type) ptr->tune;
4550 x86_prefetch_sse = ptr->prefetch_sse;
4551 opts->x_ix86_branch_cost = ptr->branch_cost;
4552 ix86_tune_defaulted = ptr->tune_defaulted;
4553 ix86_arch_specified = ptr->arch_specified;
4554 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4555 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4556 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4557 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4558 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4559 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4560 opts->x_ix86_abi = ptr->x_ix86_abi;
4561 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4562 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4563 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4564 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4565 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4566 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4567 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4568 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4569 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4570 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4571 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4572 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4573 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4574 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4575 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4576 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4577 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4578 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4579 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4580 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4581 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4582 /* TODO: ix86_cost should be chosen at instruction or function granuality
4583 so for cold code we use size_cost even in !optimize_size compilation. */
4584 if (opts->x_optimize_size)
4585 ix86_cost = &ix86_size_cost;
4586 else
4587 ix86_cost = ix86_tune_cost;
4589 /* Recreate the arch feature tests if the arch changed */
4590 if (old_arch != ix86_arch)
4592 ix86_arch_mask = 1u << ix86_arch;
4593 for (i = 0; i < X86_ARCH_LAST; ++i)
4594 ix86_arch_features[i]
4595 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4598 /* Recreate the tune optimization tests */
4599 if (old_tune != ix86_tune)
4600 set_ix86_tune_features (ix86_tune, false);
4603 /* Adjust target options after streaming them in. This is mainly about
4604 reconciling them with global options. */
4606 static void
4607 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
4609 /* flag_pic is a global option, but ix86_cmodel is target saved option
4610 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
4611 for PIC, or error out. */
4612 if (flag_pic)
4613 switch (ptr->x_ix86_cmodel)
4615 case CM_SMALL:
4616 ptr->x_ix86_cmodel = CM_SMALL_PIC;
4617 break;
4619 case CM_MEDIUM:
4620 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
4621 break;
4623 case CM_LARGE:
4624 ptr->x_ix86_cmodel = CM_LARGE_PIC;
4625 break;
4627 case CM_KERNEL:
4628 error ("code model %s does not support PIC mode", "kernel");
4629 break;
4631 default:
4632 break;
4634 else
4635 switch (ptr->x_ix86_cmodel)
4637 case CM_SMALL_PIC:
4638 ptr->x_ix86_cmodel = CM_SMALL;
4639 break;
4641 case CM_MEDIUM_PIC:
4642 ptr->x_ix86_cmodel = CM_MEDIUM;
4643 break;
4645 case CM_LARGE_PIC:
4646 ptr->x_ix86_cmodel = CM_LARGE;
4647 break;
4649 default:
4650 break;
4654 /* Print the current options */
4656 static void
4657 ix86_function_specific_print (FILE *file, int indent,
4658 struct cl_target_option *ptr)
4660 char *target_string
4661 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4662 NULL, NULL, ptr->x_ix86_fpmath, false);
4664 gcc_assert (ptr->arch < PROCESSOR_max);
4665 fprintf (file, "%*sarch = %d (%s)\n",
4666 indent, "",
4667 ptr->arch, processor_target_table[ptr->arch].name);
4669 gcc_assert (ptr->tune < PROCESSOR_max);
4670 fprintf (file, "%*stune = %d (%s)\n",
4671 indent, "",
4672 ptr->tune, processor_target_table[ptr->tune].name);
4674 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4676 if (target_string)
4678 fprintf (file, "%*s%s\n", indent, "", target_string);
4679 free (target_string);
4684 /* Inner function to process the attribute((target(...))), take an argument and
4685 set the current options from the argument. If we have a list, recursively go
4686 over the list. */
4688 static bool
4689 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4690 struct gcc_options *opts,
4691 struct gcc_options *opts_set,
4692 struct gcc_options *enum_opts_set)
4694 char *next_optstr;
4695 bool ret = true;
4697 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4698 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4699 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4700 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4701 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4703 enum ix86_opt_type
4705 ix86_opt_unknown,
4706 ix86_opt_yes,
4707 ix86_opt_no,
4708 ix86_opt_str,
4709 ix86_opt_enum,
4710 ix86_opt_isa
4713 static const struct
4715 const char *string;
4716 size_t len;
4717 enum ix86_opt_type type;
4718 int opt;
4719 int mask;
4720 } attrs[] = {
4721 /* isa options */
4722 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4723 IX86_ATTR_ISA ("abm", OPT_mabm),
4724 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4725 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4726 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4727 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4728 IX86_ATTR_ISA ("aes", OPT_maes),
4729 IX86_ATTR_ISA ("sha", OPT_msha),
4730 IX86_ATTR_ISA ("avx", OPT_mavx),
4731 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4732 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4733 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4734 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4735 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4736 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4737 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4738 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4739 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4740 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4741 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4742 IX86_ATTR_ISA ("sse", OPT_msse),
4743 IX86_ATTR_ISA ("sse2", OPT_msse2),
4744 IX86_ATTR_ISA ("sse3", OPT_msse3),
4745 IX86_ATTR_ISA ("sse4", OPT_msse4),
4746 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4747 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4748 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4749 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4750 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4751 IX86_ATTR_ISA ("fma", OPT_mfma),
4752 IX86_ATTR_ISA ("xop", OPT_mxop),
4753 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4754 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4755 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4756 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4757 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4758 IX86_ATTR_ISA ("hle", OPT_mhle),
4759 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4760 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4761 IX86_ATTR_ISA ("adx", OPT_madx),
4762 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4763 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4764 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4765 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4766 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4767 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4768 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4769 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
4770 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
4771 IX86_ATTR_ISA ("clwb", OPT_mclwb),
4772 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
4774 /* enum options */
4775 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4777 /* string options */
4778 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4779 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4781 /* flag options */
4782 IX86_ATTR_YES ("cld",
4783 OPT_mcld,
4784 MASK_CLD),
4786 IX86_ATTR_NO ("fancy-math-387",
4787 OPT_mfancy_math_387,
4788 MASK_NO_FANCY_MATH_387),
4790 IX86_ATTR_YES ("ieee-fp",
4791 OPT_mieee_fp,
4792 MASK_IEEE_FP),
4794 IX86_ATTR_YES ("inline-all-stringops",
4795 OPT_minline_all_stringops,
4796 MASK_INLINE_ALL_STRINGOPS),
4798 IX86_ATTR_YES ("inline-stringops-dynamically",
4799 OPT_minline_stringops_dynamically,
4800 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4802 IX86_ATTR_NO ("align-stringops",
4803 OPT_mno_align_stringops,
4804 MASK_NO_ALIGN_STRINGOPS),
4806 IX86_ATTR_YES ("recip",
4807 OPT_mrecip,
4808 MASK_RECIP),
4812 /* If this is a list, recurse to get the options. */
4813 if (TREE_CODE (args) == TREE_LIST)
4815 bool ret = true;
4817 for (; args; args = TREE_CHAIN (args))
4818 if (TREE_VALUE (args)
4819 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4820 p_strings, opts, opts_set,
4821 enum_opts_set))
4822 ret = false;
4824 return ret;
4827 else if (TREE_CODE (args) != STRING_CST)
4829 error ("attribute %<target%> argument not a string");
4830 return false;
4833 /* Handle multiple arguments separated by commas. */
4834 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4836 while (next_optstr && *next_optstr != '\0')
4838 char *p = next_optstr;
4839 char *orig_p = p;
4840 char *comma = strchr (next_optstr, ',');
4841 const char *opt_string;
4842 size_t len, opt_len;
4843 int opt;
4844 bool opt_set_p;
4845 char ch;
4846 unsigned i;
4847 enum ix86_opt_type type = ix86_opt_unknown;
4848 int mask = 0;
4850 if (comma)
4852 *comma = '\0';
4853 len = comma - next_optstr;
4854 next_optstr = comma + 1;
4856 else
4858 len = strlen (p);
4859 next_optstr = NULL;
4862 /* Recognize no-xxx. */
4863 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4865 opt_set_p = false;
4866 p += 3;
4867 len -= 3;
4869 else
4870 opt_set_p = true;
4872 /* Find the option. */
4873 ch = *p;
4874 opt = N_OPTS;
4875 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4877 type = attrs[i].type;
4878 opt_len = attrs[i].len;
4879 if (ch == attrs[i].string[0]
4880 && ((type != ix86_opt_str && type != ix86_opt_enum)
4881 ? len == opt_len
4882 : len > opt_len)
4883 && memcmp (p, attrs[i].string, opt_len) == 0)
4885 opt = attrs[i].opt;
4886 mask = attrs[i].mask;
4887 opt_string = attrs[i].string;
4888 break;
4892 /* Process the option. */
4893 if (opt == N_OPTS)
4895 error ("attribute(target(\"%s\")) is unknown", orig_p);
4896 ret = false;
4899 else if (type == ix86_opt_isa)
4901 struct cl_decoded_option decoded;
4903 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4904 ix86_handle_option (opts, opts_set,
4905 &decoded, input_location);
4908 else if (type == ix86_opt_yes || type == ix86_opt_no)
4910 if (type == ix86_opt_no)
4911 opt_set_p = !opt_set_p;
4913 if (opt_set_p)
4914 opts->x_target_flags |= mask;
4915 else
4916 opts->x_target_flags &= ~mask;
4919 else if (type == ix86_opt_str)
4921 if (p_strings[opt])
4923 error ("option(\"%s\") was already specified", opt_string);
4924 ret = false;
4926 else
4927 p_strings[opt] = xstrdup (p + opt_len);
4930 else if (type == ix86_opt_enum)
4932 bool arg_ok;
4933 int value;
4935 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4936 if (arg_ok)
4937 set_option (opts, enum_opts_set, opt, value,
4938 p + opt_len, DK_UNSPECIFIED, input_location,
4939 global_dc);
4940 else
4942 error ("attribute(target(\"%s\")) is unknown", orig_p);
4943 ret = false;
4947 else
4948 gcc_unreachable ();
4951 return ret;
4954 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4956 tree
4957 ix86_valid_target_attribute_tree (tree args,
4958 struct gcc_options *opts,
4959 struct gcc_options *opts_set)
4961 const char *orig_arch_string = opts->x_ix86_arch_string;
4962 const char *orig_tune_string = opts->x_ix86_tune_string;
4963 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4964 int orig_tune_defaulted = ix86_tune_defaulted;
4965 int orig_arch_specified = ix86_arch_specified;
4966 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4967 tree t = NULL_TREE;
4968 int i;
4969 struct cl_target_option *def
4970 = TREE_TARGET_OPTION (target_option_default_node);
4971 struct gcc_options enum_opts_set;
4973 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4975 /* Process each of the options on the chain. */
4976 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4977 opts_set, &enum_opts_set))
4978 return error_mark_node;
4980 /* If the changed options are different from the default, rerun
4981 ix86_option_override_internal, and then save the options away.
4982 The string options are are attribute options, and will be undone
4983 when we copy the save structure. */
4984 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4985 || opts->x_target_flags != def->x_target_flags
4986 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4987 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4988 || enum_opts_set.x_ix86_fpmath)
4990 /* If we are using the default tune= or arch=, undo the string assigned,
4991 and use the default. */
4992 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4993 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4994 else if (!orig_arch_specified)
4995 opts->x_ix86_arch_string = NULL;
4997 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4998 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4999 else if (orig_tune_defaulted)
5000 opts->x_ix86_tune_string = NULL;
5002 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
5003 if (enum_opts_set.x_ix86_fpmath)
5004 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5005 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
5006 && TARGET_SSE_P (opts->x_ix86_isa_flags))
5008 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
5009 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5012 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
5013 ix86_option_override_internal (false, opts, opts_set);
5015 /* Add any builtin functions with the new isa if any. */
5016 ix86_add_new_builtins (opts->x_ix86_isa_flags);
5018 /* Save the current options unless we are validating options for
5019 #pragma. */
5020 t = build_target_option_node (opts);
5022 opts->x_ix86_arch_string = orig_arch_string;
5023 opts->x_ix86_tune_string = orig_tune_string;
5024 opts_set->x_ix86_fpmath = orig_fpmath_set;
5026 /* Free up memory allocated to hold the strings */
5027 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
5028 free (option_strings[i]);
5031 return t;
5034 /* Hook to validate attribute((target("string"))). */
5036 static bool
5037 ix86_valid_target_attribute_p (tree fndecl,
5038 tree ARG_UNUSED (name),
5039 tree args,
5040 int ARG_UNUSED (flags))
5042 struct gcc_options func_options;
5043 tree new_target, new_optimize;
5044 bool ret = true;
5046 /* attribute((target("default"))) does nothing, beyond
5047 affecting multi-versioning. */
5048 if (TREE_VALUE (args)
5049 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
5050 && TREE_CHAIN (args) == NULL_TREE
5051 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
5052 return true;
5054 tree old_optimize = build_optimization_node (&global_options);
5056 /* Get the optimization options of the current function. */
5057 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
5059 if (!func_optimize)
5060 func_optimize = old_optimize;
5062 /* Init func_options. */
5063 memset (&func_options, 0, sizeof (func_options));
5064 init_options_struct (&func_options, NULL);
5065 lang_hooks.init_options_struct (&func_options);
5067 cl_optimization_restore (&func_options,
5068 TREE_OPTIMIZATION (func_optimize));
5070 /* Initialize func_options to the default before its target options can
5071 be set. */
5072 cl_target_option_restore (&func_options,
5073 TREE_TARGET_OPTION (target_option_default_node));
5075 new_target = ix86_valid_target_attribute_tree (args, &func_options,
5076 &global_options_set);
5078 new_optimize = build_optimization_node (&func_options);
5080 if (new_target == error_mark_node)
5081 ret = false;
5083 else if (fndecl && new_target)
5085 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5087 if (old_optimize != new_optimize)
5088 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5091 return ret;
5095 /* Hook to determine if one function can safely inline another. */
5097 static bool
5098 ix86_can_inline_p (tree caller, tree callee)
5100 bool ret = false;
5101 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5102 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5104 /* If callee has no option attributes, then it is ok to inline. */
5105 if (!callee_tree)
5106 ret = true;
5108 /* If caller has no option attributes, but callee does then it is not ok to
5109 inline. */
5110 else if (!caller_tree)
5111 ret = false;
5113 else
5115 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5116 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5118 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
5119 can inline a SSE2 function but a SSE2 function can't inline a SSE4
5120 function. */
5121 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5122 != callee_opts->x_ix86_isa_flags)
5123 ret = false;
5125 /* See if we have the same non-isa options. */
5126 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5127 ret = false;
5129 /* See if arch, tune, etc. are the same. */
5130 else if (caller_opts->arch != callee_opts->arch)
5131 ret = false;
5133 else if (caller_opts->tune != callee_opts->tune)
5134 ret = false;
5136 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5137 ret = false;
5139 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5140 ret = false;
5142 else
5143 ret = true;
5146 return ret;
5150 /* Remember the last target of ix86_set_current_function. */
5151 static GTY(()) tree ix86_previous_fndecl;
5153 /* Set targets globals to the default (or current #pragma GCC target
5154 if active). Invalidate ix86_previous_fndecl cache. */
5156 void
5157 ix86_reset_previous_fndecl (void)
5159 tree new_tree = target_option_current_node;
5160 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5161 if (TREE_TARGET_GLOBALS (new_tree))
5162 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5163 else if (new_tree == target_option_default_node)
5164 restore_target_globals (&default_target_globals);
5165 else
5166 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5167 ix86_previous_fndecl = NULL_TREE;
5170 /* Establish appropriate back-end context for processing the function
5171 FNDECL. The argument might be NULL to indicate processing at top
5172 level, outside of any function scope. */
5173 static void
5174 ix86_set_current_function (tree fndecl)
5176 /* Only change the context if the function changes. This hook is called
5177 several times in the course of compiling a function, and we don't want to
5178 slow things down too much or call target_reinit when it isn't safe. */
5179 if (fndecl == ix86_previous_fndecl)
5180 return;
5182 tree old_tree;
5183 if (ix86_previous_fndecl == NULL_TREE)
5184 old_tree = target_option_current_node;
5185 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
5186 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
5187 else
5188 old_tree = target_option_default_node;
5190 if (fndecl == NULL_TREE)
5192 if (old_tree != target_option_current_node)
5193 ix86_reset_previous_fndecl ();
5194 return;
5197 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
5198 if (new_tree == NULL_TREE)
5199 new_tree = target_option_default_node;
5201 if (old_tree != new_tree)
5203 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5204 if (TREE_TARGET_GLOBALS (new_tree))
5205 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5206 else if (new_tree == target_option_default_node)
5207 restore_target_globals (&default_target_globals);
5208 else
5209 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5211 ix86_previous_fndecl = fndecl;
5215 /* Return true if this goes in large data/bss. */
5217 static bool
5218 ix86_in_large_data_p (tree exp)
5220 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5221 return false;
5223 /* Functions are never large data. */
5224 if (TREE_CODE (exp) == FUNCTION_DECL)
5225 return false;
5227 /* Automatic variables are never large data. */
5228 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5229 return false;
5231 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5233 const char *section = DECL_SECTION_NAME (exp);
5234 if (strcmp (section, ".ldata") == 0
5235 || strcmp (section, ".lbss") == 0)
5236 return true;
5237 return false;
5239 else
5241 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5243 /* If this is an incomplete type with size 0, then we can't put it
5244 in data because it might be too big when completed. Also,
5245 int_size_in_bytes returns -1 if size can vary or is larger than
5246 an integer in which case also it is safer to assume that it goes in
5247 large data. */
5248 if (size <= 0 || size > ix86_section_threshold)
5249 return true;
5252 return false;
5255 /* Switch to the appropriate section for output of DECL.
5256 DECL is either a `VAR_DECL' node or a constant of some sort.
5257 RELOC indicates whether forming the initial value of DECL requires
5258 link-time relocations. */
5260 ATTRIBUTE_UNUSED static section *
5261 x86_64_elf_select_section (tree decl, int reloc,
5262 unsigned HOST_WIDE_INT align)
5264 if (ix86_in_large_data_p (decl))
5266 const char *sname = NULL;
5267 unsigned int flags = SECTION_WRITE;
5268 switch (categorize_decl_for_section (decl, reloc))
5270 case SECCAT_DATA:
5271 sname = ".ldata";
5272 break;
5273 case SECCAT_DATA_REL:
5274 sname = ".ldata.rel";
5275 break;
5276 case SECCAT_DATA_REL_LOCAL:
5277 sname = ".ldata.rel.local";
5278 break;
5279 case SECCAT_DATA_REL_RO:
5280 sname = ".ldata.rel.ro";
5281 break;
5282 case SECCAT_DATA_REL_RO_LOCAL:
5283 sname = ".ldata.rel.ro.local";
5284 break;
5285 case SECCAT_BSS:
5286 sname = ".lbss";
5287 flags |= SECTION_BSS;
5288 break;
5289 case SECCAT_RODATA:
5290 case SECCAT_RODATA_MERGE_STR:
5291 case SECCAT_RODATA_MERGE_STR_INIT:
5292 case SECCAT_RODATA_MERGE_CONST:
5293 sname = ".lrodata";
5294 flags = 0;
5295 break;
5296 case SECCAT_SRODATA:
5297 case SECCAT_SDATA:
5298 case SECCAT_SBSS:
5299 gcc_unreachable ();
5300 case SECCAT_TEXT:
5301 case SECCAT_TDATA:
5302 case SECCAT_TBSS:
5303 /* We don't split these for medium model. Place them into
5304 default sections and hope for best. */
5305 break;
5307 if (sname)
5309 /* We might get called with string constants, but get_named_section
5310 doesn't like them as they are not DECLs. Also, we need to set
5311 flags in that case. */
5312 if (!DECL_P (decl))
5313 return get_section (sname, flags, NULL);
5314 return get_named_section (decl, sname, reloc);
5317 return default_elf_select_section (decl, reloc, align);
5320 /* Select a set of attributes for section NAME based on the properties
5321 of DECL and whether or not RELOC indicates that DECL's initializer
5322 might contain runtime relocations. */
5324 static unsigned int ATTRIBUTE_UNUSED
5325 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5327 unsigned int flags = default_section_type_flags (decl, name, reloc);
5329 if (decl == NULL_TREE
5330 && (strcmp (name, ".ldata.rel.ro") == 0
5331 || strcmp (name, ".ldata.rel.ro.local") == 0))
5332 flags |= SECTION_RELRO;
5334 if (strcmp (name, ".lbss") == 0
5335 || strncmp (name, ".lbss.", 5) == 0
5336 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5337 flags |= SECTION_BSS;
5339 return flags;
5342 /* Build up a unique section name, expressed as a
5343 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5344 RELOC indicates whether the initial value of EXP requires
5345 link-time relocations. */
5347 static void ATTRIBUTE_UNUSED
5348 x86_64_elf_unique_section (tree decl, int reloc)
5350 if (ix86_in_large_data_p (decl))
5352 const char *prefix = NULL;
5353 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5354 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5356 switch (categorize_decl_for_section (decl, reloc))
5358 case SECCAT_DATA:
5359 case SECCAT_DATA_REL:
5360 case SECCAT_DATA_REL_LOCAL:
5361 case SECCAT_DATA_REL_RO:
5362 case SECCAT_DATA_REL_RO_LOCAL:
5363 prefix = one_only ? ".ld" : ".ldata";
5364 break;
5365 case SECCAT_BSS:
5366 prefix = one_only ? ".lb" : ".lbss";
5367 break;
5368 case SECCAT_RODATA:
5369 case SECCAT_RODATA_MERGE_STR:
5370 case SECCAT_RODATA_MERGE_STR_INIT:
5371 case SECCAT_RODATA_MERGE_CONST:
5372 prefix = one_only ? ".lr" : ".lrodata";
5373 break;
5374 case SECCAT_SRODATA:
5375 case SECCAT_SDATA:
5376 case SECCAT_SBSS:
5377 gcc_unreachable ();
5378 case SECCAT_TEXT:
5379 case SECCAT_TDATA:
5380 case SECCAT_TBSS:
5381 /* We don't split these for medium model. Place them into
5382 default sections and hope for best. */
5383 break;
5385 if (prefix)
5387 const char *name, *linkonce;
5388 char *string;
5390 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5391 name = targetm.strip_name_encoding (name);
5393 /* If we're using one_only, then there needs to be a .gnu.linkonce
5394 prefix to the section name. */
5395 linkonce = one_only ? ".gnu.linkonce" : "";
5397 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5399 set_decl_section_name (decl, string);
5400 return;
5403 default_unique_section (decl, reloc);
5406 #ifdef COMMON_ASM_OP
5407 /* This says how to output assembler code to declare an
5408 uninitialized external linkage data object.
5410 For medium model x86-64 we need to use .largecomm opcode for
5411 large objects. */
5412 void
5413 x86_elf_aligned_common (FILE *file,
5414 const char *name, unsigned HOST_WIDE_INT size,
5415 int align)
5417 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5418 && size > (unsigned int)ix86_section_threshold)
5419 fputs ("\t.largecomm\t", file);
5420 else
5421 fputs (COMMON_ASM_OP, file);
5422 assemble_name (file, name);
5423 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5424 size, align / BITS_PER_UNIT);
5426 #endif
5428 /* Utility function for targets to use in implementing
5429 ASM_OUTPUT_ALIGNED_BSS. */
5431 void
5432 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5433 unsigned HOST_WIDE_INT size, int align)
5435 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5436 && size > (unsigned int)ix86_section_threshold)
5437 switch_to_section (get_named_section (decl, ".lbss", 0));
5438 else
5439 switch_to_section (bss_section);
5440 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5441 #ifdef ASM_DECLARE_OBJECT_NAME
5442 last_assemble_variable_decl = decl;
5443 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5444 #else
5445 /* Standard thing is just output label for the object. */
5446 ASM_OUTPUT_LABEL (file, name);
5447 #endif /* ASM_DECLARE_OBJECT_NAME */
5448 ASM_OUTPUT_SKIP (file, size ? size : 1);
5451 /* Decide whether we must probe the stack before any space allocation
5452 on this target. It's essentially TARGET_STACK_PROBE except when
5453 -fstack-check causes the stack to be already probed differently. */
5455 bool
5456 ix86_target_stack_probe (void)
5458 /* Do not probe the stack twice if static stack checking is enabled. */
5459 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5460 return false;
5462 return TARGET_STACK_PROBE;
5465 /* Decide whether we can make a sibling call to a function. DECL is the
5466 declaration of the function being targeted by the call and EXP is the
5467 CALL_EXPR representing the call. */
5469 static bool
5470 ix86_function_ok_for_sibcall (tree decl, tree exp)
5472 tree type, decl_or_type;
5473 rtx a, b;
5475 /* If we are generating position-independent code, we cannot sibcall
5476 optimize any indirect call, or a direct call to a global function,
5477 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5478 if (!TARGET_MACHO
5479 && !TARGET_64BIT
5480 && flag_pic
5481 && (!decl || !targetm.binds_local_p (decl)))
5482 return false;
5484 /* If we need to align the outgoing stack, then sibcalling would
5485 unalign the stack, which may break the called function. */
5486 if (ix86_minimum_incoming_stack_boundary (true)
5487 < PREFERRED_STACK_BOUNDARY)
5488 return false;
5490 if (decl)
5492 decl_or_type = decl;
5493 type = TREE_TYPE (decl);
5495 else
5497 /* We're looking at the CALL_EXPR, we need the type of the function. */
5498 type = CALL_EXPR_FN (exp); /* pointer expression */
5499 type = TREE_TYPE (type); /* pointer type */
5500 type = TREE_TYPE (type); /* function type */
5501 decl_or_type = type;
5504 /* Check that the return value locations are the same. Like
5505 if we are returning floats on the 80387 register stack, we cannot
5506 make a sibcall from a function that doesn't return a float to a
5507 function that does or, conversely, from a function that does return
5508 a float to a function that doesn't; the necessary stack adjustment
5509 would not be executed. This is also the place we notice
5510 differences in the return value ABI. Note that it is ok for one
5511 of the functions to have void return type as long as the return
5512 value of the other is passed in a register. */
5513 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5514 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5515 cfun->decl, false);
5516 if (STACK_REG_P (a) || STACK_REG_P (b))
5518 if (!rtx_equal_p (a, b))
5519 return false;
5521 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5523 else if (!rtx_equal_p (a, b))
5524 return false;
5526 if (TARGET_64BIT)
5528 /* The SYSV ABI has more call-clobbered registers;
5529 disallow sibcalls from MS to SYSV. */
5530 if (cfun->machine->call_abi == MS_ABI
5531 && ix86_function_type_abi (type) == SYSV_ABI)
5532 return false;
5534 else
5536 /* If this call is indirect, we'll need to be able to use a
5537 call-clobbered register for the address of the target function.
5538 Make sure that all such registers are not used for passing
5539 parameters. Note that DLLIMPORT functions are indirect. */
5540 if (!decl
5541 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5543 if (ix86_function_regparm (type, NULL) >= 3)
5545 /* ??? Need to count the actual number of registers to be used,
5546 not the possible number of registers. Fix later. */
5547 return false;
5552 /* Otherwise okay. That also includes certain types of indirect calls. */
5553 return true;
5556 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5557 and "sseregparm" calling convention attributes;
5558 arguments as in struct attribute_spec.handler. */
5560 static tree
5561 ix86_handle_cconv_attribute (tree *node, tree name,
5562 tree args,
5563 int,
5564 bool *no_add_attrs)
5566 if (TREE_CODE (*node) != FUNCTION_TYPE
5567 && TREE_CODE (*node) != METHOD_TYPE
5568 && TREE_CODE (*node) != FIELD_DECL
5569 && TREE_CODE (*node) != TYPE_DECL)
5571 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5572 name);
5573 *no_add_attrs = true;
5574 return NULL_TREE;
5577 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5578 if (is_attribute_p ("regparm", name))
5580 tree cst;
5582 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5584 error ("fastcall and regparm attributes are not compatible");
5587 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5589 error ("regparam and thiscall attributes are not compatible");
5592 cst = TREE_VALUE (args);
5593 if (TREE_CODE (cst) != INTEGER_CST)
5595 warning (OPT_Wattributes,
5596 "%qE attribute requires an integer constant argument",
5597 name);
5598 *no_add_attrs = true;
5600 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5602 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5603 name, REGPARM_MAX);
5604 *no_add_attrs = true;
5607 return NULL_TREE;
5610 if (TARGET_64BIT)
5612 /* Do not warn when emulating the MS ABI. */
5613 if ((TREE_CODE (*node) != FUNCTION_TYPE
5614 && TREE_CODE (*node) != METHOD_TYPE)
5615 || ix86_function_type_abi (*node) != MS_ABI)
5616 warning (OPT_Wattributes, "%qE attribute ignored",
5617 name);
5618 *no_add_attrs = true;
5619 return NULL_TREE;
5622 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5623 if (is_attribute_p ("fastcall", name))
5625 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5627 error ("fastcall and cdecl attributes are not compatible");
5629 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5631 error ("fastcall and stdcall attributes are not compatible");
5633 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5635 error ("fastcall and regparm attributes are not compatible");
5637 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5639 error ("fastcall and thiscall attributes are not compatible");
5643 /* Can combine stdcall with fastcall (redundant), regparm and
5644 sseregparm. */
5645 else if (is_attribute_p ("stdcall", name))
5647 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5649 error ("stdcall and cdecl attributes are not compatible");
5651 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5653 error ("stdcall and fastcall attributes are not compatible");
5655 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5657 error ("stdcall and thiscall attributes are not compatible");
5661 /* Can combine cdecl with regparm and sseregparm. */
5662 else if (is_attribute_p ("cdecl", name))
5664 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5666 error ("stdcall and cdecl attributes are not compatible");
5668 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5670 error ("fastcall and cdecl attributes are not compatible");
5672 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5674 error ("cdecl and thiscall attributes are not compatible");
5677 else if (is_attribute_p ("thiscall", name))
5679 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5680 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
5681 name);
5682 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5684 error ("stdcall and thiscall attributes are not compatible");
5686 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5688 error ("fastcall and thiscall attributes are not compatible");
5690 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5692 error ("cdecl and thiscall attributes are not compatible");
5696 /* Can combine sseregparm with all attributes. */
5698 return NULL_TREE;
5701 /* The transactional memory builtins are implicitly regparm or fastcall
5702 depending on the ABI. Override the generic do-nothing attribute that
5703 these builtins were declared with, and replace it with one of the two
5704 attributes that we expect elsewhere. */
5706 static tree
5707 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5708 int flags, bool *no_add_attrs)
5710 tree alt;
5712 /* In no case do we want to add the placeholder attribute. */
5713 *no_add_attrs = true;
5715 /* The 64-bit ABI is unchanged for transactional memory. */
5716 if (TARGET_64BIT)
5717 return NULL_TREE;
5719 /* ??? Is there a better way to validate 32-bit windows? We have
5720 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5721 if (CHECK_STACK_LIMIT > 0)
5722 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5723 else
5725 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5726 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5728 decl_attributes (node, alt, flags);
5730 return NULL_TREE;
5733 /* This function determines from TYPE the calling-convention. */
5735 unsigned int
5736 ix86_get_callcvt (const_tree type)
5738 unsigned int ret = 0;
5739 bool is_stdarg;
5740 tree attrs;
5742 if (TARGET_64BIT)
5743 return IX86_CALLCVT_CDECL;
5745 attrs = TYPE_ATTRIBUTES (type);
5746 if (attrs != NULL_TREE)
5748 if (lookup_attribute ("cdecl", attrs))
5749 ret |= IX86_CALLCVT_CDECL;
5750 else if (lookup_attribute ("stdcall", attrs))
5751 ret |= IX86_CALLCVT_STDCALL;
5752 else if (lookup_attribute ("fastcall", attrs))
5753 ret |= IX86_CALLCVT_FASTCALL;
5754 else if (lookup_attribute ("thiscall", attrs))
5755 ret |= IX86_CALLCVT_THISCALL;
5757 /* Regparam isn't allowed for thiscall and fastcall. */
5758 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5760 if (lookup_attribute ("regparm", attrs))
5761 ret |= IX86_CALLCVT_REGPARM;
5762 if (lookup_attribute ("sseregparm", attrs))
5763 ret |= IX86_CALLCVT_SSEREGPARM;
5766 if (IX86_BASE_CALLCVT(ret) != 0)
5767 return ret;
5770 is_stdarg = stdarg_p (type);
5771 if (TARGET_RTD && !is_stdarg)
5772 return IX86_CALLCVT_STDCALL | ret;
5774 if (ret != 0
5775 || is_stdarg
5776 || TREE_CODE (type) != METHOD_TYPE
5777 || ix86_function_type_abi (type) != MS_ABI)
5778 return IX86_CALLCVT_CDECL | ret;
5780 return IX86_CALLCVT_THISCALL;
5783 /* Return 0 if the attributes for two types are incompatible, 1 if they
5784 are compatible, and 2 if they are nearly compatible (which causes a
5785 warning to be generated). */
5787 static int
5788 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5790 unsigned int ccvt1, ccvt2;
5792 if (TREE_CODE (type1) != FUNCTION_TYPE
5793 && TREE_CODE (type1) != METHOD_TYPE)
5794 return 1;
5796 ccvt1 = ix86_get_callcvt (type1);
5797 ccvt2 = ix86_get_callcvt (type2);
5798 if (ccvt1 != ccvt2)
5799 return 0;
5800 if (ix86_function_regparm (type1, NULL)
5801 != ix86_function_regparm (type2, NULL))
5802 return 0;
5804 return 1;
5807 /* Return the regparm value for a function with the indicated TYPE and DECL.
5808 DECL may be NULL when calling function indirectly
5809 or considering a libcall. */
5811 static int
5812 ix86_function_regparm (const_tree type, const_tree decl)
5814 tree attr;
5815 int regparm;
5816 unsigned int ccvt;
5818 if (TARGET_64BIT)
5819 return (ix86_function_type_abi (type) == SYSV_ABI
5820 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5821 ccvt = ix86_get_callcvt (type);
5822 regparm = ix86_regparm;
5824 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5826 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5827 if (attr)
5829 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5830 return regparm;
5833 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5834 return 2;
5835 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5836 return 1;
5838 /* Use register calling convention for local functions when possible. */
5839 if (decl
5840 && TREE_CODE (decl) == FUNCTION_DECL)
5842 cgraph_node *target = cgraph_node::get (decl);
5843 if (target)
5844 target = target->function_symbol ();
5846 /* Caller and callee must agree on the calling convention, so
5847 checking here just optimize means that with
5848 __attribute__((optimize (...))) caller could use regparm convention
5849 and callee not, or vice versa. Instead look at whether the callee
5850 is optimized or not. */
5851 if (target && opt_for_fn (target->decl, optimize)
5852 && !(profile_flag && !flag_fentry))
5854 cgraph_local_info *i = &target->local;
5855 if (i && i->local && i->can_change_signature)
5857 int local_regparm, globals = 0, regno;
5859 /* Make sure no regparm register is taken by a
5860 fixed register variable. */
5861 for (local_regparm = 0; local_regparm < REGPARM_MAX;
5862 local_regparm++)
5863 if (fixed_regs[local_regparm])
5864 break;
5866 /* We don't want to use regparm(3) for nested functions as
5867 these use a static chain pointer in the third argument. */
5868 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
5869 local_regparm = 2;
5871 /* Save a register for the split stack. */
5872 if (local_regparm == 3 && flag_split_stack)
5873 local_regparm = 2;
5875 /* Each fixed register usage increases register pressure,
5876 so less registers should be used for argument passing.
5877 This functionality can be overriden by an explicit
5878 regparm value. */
5879 for (regno = AX_REG; regno <= DI_REG; regno++)
5880 if (fixed_regs[regno])
5881 globals++;
5883 local_regparm
5884 = globals < local_regparm ? local_regparm - globals : 0;
5886 if (local_regparm > regparm)
5887 regparm = local_regparm;
5892 return regparm;
5895 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5896 DFmode (2) arguments in SSE registers for a function with the
5897 indicated TYPE and DECL. DECL may be NULL when calling function
5898 indirectly or considering a libcall. Otherwise return 0. */
5900 static int
5901 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5903 gcc_assert (!TARGET_64BIT);
5905 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5906 by the sseregparm attribute. */
5907 if (TARGET_SSEREGPARM
5908 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5910 if (!TARGET_SSE)
5912 if (warn)
5914 if (decl)
5915 error ("calling %qD with attribute sseregparm without "
5916 "SSE/SSE2 enabled", decl);
5917 else
5918 error ("calling %qT with attribute sseregparm without "
5919 "SSE/SSE2 enabled", type);
5921 return 0;
5924 return 2;
5927 if (!decl)
5928 return 0;
5930 cgraph_node *target = cgraph_node::get (decl);
5931 if (target)
5932 target = target->function_symbol ();
5934 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5935 (and DFmode for SSE2) arguments in SSE registers. */
5936 if (target
5937 /* TARGET_SSE_MATH */
5938 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
5939 && opt_for_fn (target->decl, optimize)
5940 && !(profile_flag && !flag_fentry))
5942 cgraph_local_info *i = &target->local;
5943 if (i && i->local && i->can_change_signature)
5945 /* Refuse to produce wrong code when local function with SSE enabled
5946 is called from SSE disabled function.
5947 We may work hard to work out these scenarios but hopefully
5948 it doesnot matter in practice. */
5949 if (!TARGET_SSE && warn)
5951 error ("calling %qD with SSE caling convention without "
5952 "SSE/SSE2 enabled", decl);
5953 return 0;
5955 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
5956 ->x_ix86_isa_flags) ? 2 : 1;
5960 return 0;
5963 /* Return true if EAX is live at the start of the function. Used by
5964 ix86_expand_prologue to determine if we need special help before
5965 calling allocate_stack_worker. */
5967 static bool
5968 ix86_eax_live_at_start_p (void)
5970 /* Cheat. Don't bother working forward from ix86_function_regparm
5971 to the function type to whether an actual argument is located in
5972 eax. Instead just look at cfg info, which is still close enough
5973 to correct at this point. This gives false positives for broken
5974 functions that might use uninitialized data that happens to be
5975 allocated in eax, but who cares? */
5976 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5979 static bool
5980 ix86_keep_aggregate_return_pointer (tree fntype)
5982 tree attr;
5984 if (!TARGET_64BIT)
5986 attr = lookup_attribute ("callee_pop_aggregate_return",
5987 TYPE_ATTRIBUTES (fntype));
5988 if (attr)
5989 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5991 /* For 32-bit MS-ABI the default is to keep aggregate
5992 return pointer. */
5993 if (ix86_function_type_abi (fntype) == MS_ABI)
5994 return true;
5996 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5999 /* Value is the number of bytes of arguments automatically
6000 popped when returning from a subroutine call.
6001 FUNDECL is the declaration node of the function (as a tree),
6002 FUNTYPE is the data type of the function (as a tree),
6003 or for a library call it is an identifier node for the subroutine name.
6004 SIZE is the number of bytes of arguments passed on the stack.
6006 On the 80386, the RTD insn may be used to pop them if the number
6007 of args is fixed, but if the number is variable then the caller
6008 must pop them all. RTD can't be used for library calls now
6009 because the library is compiled with the Unix compiler.
6010 Use of RTD is a selectable option, since it is incompatible with
6011 standard Unix calling sequences. If the option is not selected,
6012 the caller must always pop the args.
6014 The attribute stdcall is equivalent to RTD on a per module basis. */
6016 static int
6017 ix86_return_pops_args (tree fundecl, tree funtype, int size)
6019 unsigned int ccvt;
6021 /* None of the 64-bit ABIs pop arguments. */
6022 if (TARGET_64BIT)
6023 return 0;
6025 ccvt = ix86_get_callcvt (funtype);
6027 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
6028 | IX86_CALLCVT_THISCALL)) != 0
6029 && ! stdarg_p (funtype))
6030 return size;
6032 /* Lose any fake structure return argument if it is passed on the stack. */
6033 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
6034 && !ix86_keep_aggregate_return_pointer (funtype))
6036 int nregs = ix86_function_regparm (funtype, fundecl);
6037 if (nregs == 0)
6038 return GET_MODE_SIZE (Pmode);
6041 return 0;
6044 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
6046 static bool
6047 ix86_legitimate_combined_insn (rtx_insn *insn)
6049 /* Check operand constraints in case hard registers were propagated
6050 into insn pattern. This check prevents combine pass from
6051 generating insn patterns with invalid hard register operands.
6052 These invalid insns can eventually confuse reload to error out
6053 with a spill failure. See also PRs 46829 and 46843. */
6054 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
6056 int i;
6058 extract_insn (insn);
6059 preprocess_constraints (insn);
6061 int n_operands = recog_data.n_operands;
6062 int n_alternatives = recog_data.n_alternatives;
6063 for (i = 0; i < n_operands; i++)
6065 rtx op = recog_data.operand[i];
6066 machine_mode mode = GET_MODE (op);
6067 const operand_alternative *op_alt;
6068 int offset = 0;
6069 bool win;
6070 int j;
6072 /* For pre-AVX disallow unaligned loads/stores where the
6073 instructions don't support it. */
6074 if (!TARGET_AVX
6075 && VECTOR_MODE_P (GET_MODE (op))
6076 && misaligned_operand (op, GET_MODE (op)))
6078 int min_align = get_attr_ssememalign (insn);
6079 if (min_align == 0)
6080 return false;
6083 /* A unary operator may be accepted by the predicate, but it
6084 is irrelevant for matching constraints. */
6085 if (UNARY_P (op))
6086 op = XEXP (op, 0);
6088 if (GET_CODE (op) == SUBREG)
6090 if (REG_P (SUBREG_REG (op))
6091 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6092 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6093 GET_MODE (SUBREG_REG (op)),
6094 SUBREG_BYTE (op),
6095 GET_MODE (op));
6096 op = SUBREG_REG (op);
6099 if (!(REG_P (op) && HARD_REGISTER_P (op)))
6100 continue;
6102 op_alt = recog_op_alt;
6104 /* Operand has no constraints, anything is OK. */
6105 win = !n_alternatives;
6107 alternative_mask preferred = get_preferred_alternatives (insn);
6108 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6110 if (!TEST_BIT (preferred, j))
6111 continue;
6112 if (op_alt[i].anything_ok
6113 || (op_alt[i].matches != -1
6114 && operands_match_p
6115 (recog_data.operand[i],
6116 recog_data.operand[op_alt[i].matches]))
6117 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6119 win = true;
6120 break;
6124 if (!win)
6125 return false;
6129 return true;
6132 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6134 static unsigned HOST_WIDE_INT
6135 ix86_asan_shadow_offset (void)
6137 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6138 : HOST_WIDE_INT_C (0x7fff8000))
6139 : (HOST_WIDE_INT_1 << 29);
6142 /* Argument support functions. */
6144 /* Return true when register may be used to pass function parameters. */
6145 bool
6146 ix86_function_arg_regno_p (int regno)
6148 int i;
6149 const int *parm_regs;
6151 if (TARGET_MPX && BND_REGNO_P (regno))
6152 return true;
6154 if (!TARGET_64BIT)
6156 if (TARGET_MACHO)
6157 return (regno < REGPARM_MAX
6158 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6159 else
6160 return (regno < REGPARM_MAX
6161 || (TARGET_MMX && MMX_REGNO_P (regno)
6162 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6163 || (TARGET_SSE && SSE_REGNO_P (regno)
6164 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6167 if (TARGET_SSE && SSE_REGNO_P (regno)
6168 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6169 return true;
6171 /* TODO: The function should depend on current function ABI but
6172 builtins.c would need updating then. Therefore we use the
6173 default ABI. */
6175 /* RAX is used as hidden argument to va_arg functions. */
6176 if (ix86_abi == SYSV_ABI && regno == AX_REG)
6177 return true;
6179 if (ix86_abi == MS_ABI)
6180 parm_regs = x86_64_ms_abi_int_parameter_registers;
6181 else
6182 parm_regs = x86_64_int_parameter_registers;
6183 for (i = 0; i < (ix86_abi == MS_ABI
6184 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6185 if (regno == parm_regs[i])
6186 return true;
6187 return false;
6190 /* Return if we do not know how to pass TYPE solely in registers. */
6192 static bool
6193 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6195 if (must_pass_in_stack_var_size_or_pad (mode, type))
6196 return true;
6198 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6199 The layout_type routine is crafty and tries to trick us into passing
6200 currently unsupported vector types on the stack by using TImode. */
6201 return (!TARGET_64BIT && mode == TImode
6202 && type && TREE_CODE (type) != VECTOR_TYPE);
6205 /* It returns the size, in bytes, of the area reserved for arguments passed
6206 in registers for the function represented by fndecl dependent to the used
6207 abi format. */
6209 ix86_reg_parm_stack_space (const_tree fndecl)
6211 enum calling_abi call_abi = SYSV_ABI;
6212 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6213 call_abi = ix86_function_abi (fndecl);
6214 else
6215 call_abi = ix86_function_type_abi (fndecl);
6216 if (TARGET_64BIT && call_abi == MS_ABI)
6217 return 32;
6218 return 0;
6221 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6222 call abi used. */
6223 enum calling_abi
6224 ix86_function_type_abi (const_tree fntype)
6226 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6228 enum calling_abi abi = ix86_abi;
6229 if (abi == SYSV_ABI)
6231 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6233 if (TARGET_X32)
6235 static bool warned = false;
6236 if (!warned)
6238 error ("X32 does not support ms_abi attribute");
6239 warned = true;
6242 abi = MS_ABI;
6245 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6246 abi = SYSV_ABI;
6247 return abi;
6249 return ix86_abi;
6252 /* We add this as a workaround in order to use libc_has_function
6253 hook in i386.md. */
6254 bool
6255 ix86_libc_has_function (enum function_class fn_class)
6257 return targetm.libc_has_function (fn_class);
6260 static bool
6261 ix86_function_ms_hook_prologue (const_tree fn)
6263 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6265 if (decl_function_context (fn) != NULL_TREE)
6266 error_at (DECL_SOURCE_LOCATION (fn),
6267 "ms_hook_prologue is not compatible with nested function");
6268 else
6269 return true;
6271 return false;
6274 static enum calling_abi
6275 ix86_function_abi (const_tree fndecl)
6277 if (! fndecl)
6278 return ix86_abi;
6279 return ix86_function_type_abi (TREE_TYPE (fndecl));
6282 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6283 call abi used. */
6284 enum calling_abi
6285 ix86_cfun_abi (void)
6287 if (! cfun)
6288 return ix86_abi;
6289 return cfun->machine->call_abi;
6292 /* Write the extra assembler code needed to declare a function properly. */
6294 void
6295 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6296 tree decl)
6298 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6300 if (is_ms_hook)
6302 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6303 unsigned int filler_cc = 0xcccccccc;
6305 for (i = 0; i < filler_count; i += 4)
6306 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6309 #ifdef SUBTARGET_ASM_UNWIND_INIT
6310 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6311 #endif
6313 ASM_OUTPUT_LABEL (asm_out_file, fname);
6315 /* Output magic byte marker, if hot-patch attribute is set. */
6316 if (is_ms_hook)
6318 if (TARGET_64BIT)
6320 /* leaq [%rsp + 0], %rsp */
6321 asm_fprintf (asm_out_file, ASM_BYTE
6322 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6324 else
6326 /* movl.s %edi, %edi
6327 push %ebp
6328 movl.s %esp, %ebp */
6329 asm_fprintf (asm_out_file, ASM_BYTE
6330 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6335 /* regclass.c */
6336 extern void init_regs (void);
6338 /* Implementation of call abi switching target hook. Specific to FNDECL
6339 the specific call register sets are set. See also
6340 ix86_conditional_register_usage for more details. */
6341 void
6342 ix86_call_abi_override (const_tree fndecl)
6344 if (fndecl == NULL_TREE)
6345 cfun->machine->call_abi = ix86_abi;
6346 else
6347 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6350 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6351 expensive re-initialization of init_regs each time we switch function context
6352 since this is needed only during RTL expansion. */
6353 static void
6354 ix86_maybe_switch_abi (void)
6356 if (TARGET_64BIT &&
6357 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6358 reinit_regs ();
6361 /* Return 1 if pseudo register should be created and used to hold
6362 GOT address for PIC code. */
6363 bool
6364 ix86_use_pseudo_pic_reg (void)
6366 if ((TARGET_64BIT
6367 && (ix86_cmodel == CM_SMALL_PIC
6368 || TARGET_PECOFF))
6369 || !flag_pic)
6370 return false;
6371 return true;
6374 /* Initialize large model PIC register. */
6376 static void
6377 ix86_init_large_pic_reg (unsigned int tmp_regno)
6379 rtx_code_label *label;
6380 rtx tmp_reg;
6382 gcc_assert (Pmode == DImode);
6383 label = gen_label_rtx ();
6384 emit_label (label);
6385 LABEL_PRESERVE_P (label) = 1;
6386 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6387 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6388 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6389 label));
6390 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6391 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6392 pic_offset_table_rtx, tmp_reg));
6395 /* Create and initialize PIC register if required. */
6396 static void
6397 ix86_init_pic_reg (void)
6399 edge entry_edge;
6400 rtx_insn *seq;
6402 if (!ix86_use_pseudo_pic_reg ())
6403 return;
6405 start_sequence ();
6407 if (TARGET_64BIT)
6409 if (ix86_cmodel == CM_LARGE_PIC)
6410 ix86_init_large_pic_reg (R11_REG);
6411 else
6412 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6414 else
6416 /* If there is future mcount call in the function it is more profitable
6417 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
6418 rtx reg = crtl->profile
6419 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6420 : pic_offset_table_rtx;
6421 rtx_insn *insn = emit_insn (gen_set_got (reg));
6422 RTX_FRAME_RELATED_P (insn) = 1;
6423 if (crtl->profile)
6424 emit_move_insn (pic_offset_table_rtx, reg);
6425 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6428 seq = get_insns ();
6429 end_sequence ();
6431 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6432 insert_insn_on_edge (seq, entry_edge);
6433 commit_one_edge_insertion (entry_edge);
6436 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6437 for a call to a function whose data type is FNTYPE.
6438 For a library call, FNTYPE is 0. */
6440 void
6441 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6442 tree fntype, /* tree ptr for function decl */
6443 rtx libname, /* SYMBOL_REF of library name or 0 */
6444 tree fndecl,
6445 int caller)
6447 struct cgraph_local_info *i = NULL;
6448 struct cgraph_node *target = NULL;
6450 memset (cum, 0, sizeof (*cum));
6452 if (fndecl)
6454 target = cgraph_node::get (fndecl);
6455 if (target)
6457 target = target->function_symbol ();
6458 i = cgraph_node::local_info (target->decl);
6459 cum->call_abi = ix86_function_abi (target->decl);
6461 else
6462 cum->call_abi = ix86_function_abi (fndecl);
6464 else
6465 cum->call_abi = ix86_function_type_abi (fntype);
6467 cum->caller = caller;
6469 /* Set up the number of registers to use for passing arguments. */
6470 cum->nregs = ix86_regparm;
6471 if (TARGET_64BIT)
6473 cum->nregs = (cum->call_abi == SYSV_ABI
6474 ? X86_64_REGPARM_MAX
6475 : X86_64_MS_REGPARM_MAX);
6477 if (TARGET_SSE)
6479 cum->sse_nregs = SSE_REGPARM_MAX;
6480 if (TARGET_64BIT)
6482 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6483 ? X86_64_SSE_REGPARM_MAX
6484 : X86_64_MS_SSE_REGPARM_MAX);
6487 if (TARGET_MMX)
6488 cum->mmx_nregs = MMX_REGPARM_MAX;
6489 cum->warn_avx512f = true;
6490 cum->warn_avx = true;
6491 cum->warn_sse = true;
6492 cum->warn_mmx = true;
6494 /* Because type might mismatch in between caller and callee, we need to
6495 use actual type of function for local calls.
6496 FIXME: cgraph_analyze can be told to actually record if function uses
6497 va_start so for local functions maybe_vaarg can be made aggressive
6498 helping K&R code.
6499 FIXME: once typesytem is fixed, we won't need this code anymore. */
6500 if (i && i->local && i->can_change_signature)
6501 fntype = TREE_TYPE (target->decl);
6502 cum->stdarg = stdarg_p (fntype);
6503 cum->maybe_vaarg = (fntype
6504 ? (!prototype_p (fntype) || stdarg_p (fntype))
6505 : !libname);
6507 cum->bnd_regno = FIRST_BND_REG;
6508 cum->bnds_in_bt = 0;
6509 cum->force_bnd_pass = 0;
6511 if (!TARGET_64BIT)
6513 /* If there are variable arguments, then we won't pass anything
6514 in registers in 32-bit mode. */
6515 if (stdarg_p (fntype))
6517 cum->nregs = 0;
6518 cum->sse_nregs = 0;
6519 cum->mmx_nregs = 0;
6520 cum->warn_avx512f = false;
6521 cum->warn_avx = false;
6522 cum->warn_sse = false;
6523 cum->warn_mmx = false;
6524 return;
6527 /* Use ecx and edx registers if function has fastcall attribute,
6528 else look for regparm information. */
6529 if (fntype)
6531 unsigned int ccvt = ix86_get_callcvt (fntype);
6532 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6534 cum->nregs = 1;
6535 cum->fastcall = 1; /* Same first register as in fastcall. */
6537 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6539 cum->nregs = 2;
6540 cum->fastcall = 1;
6542 else
6543 cum->nregs = ix86_function_regparm (fntype, fndecl);
6546 /* Set up the number of SSE registers used for passing SFmode
6547 and DFmode arguments. Warn for mismatching ABI. */
6548 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6552 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6553 But in the case of vector types, it is some vector mode.
6555 When we have only some of our vector isa extensions enabled, then there
6556 are some modes for which vector_mode_supported_p is false. For these
6557 modes, the generic vector support in gcc will choose some non-vector mode
6558 in order to implement the type. By computing the natural mode, we'll
6559 select the proper ABI location for the operand and not depend on whatever
6560 the middle-end decides to do with these vector types.
6562 The midde-end can't deal with the vector types > 16 bytes. In this
6563 case, we return the original mode and warn ABI change if CUM isn't
6564 NULL.
6566 If INT_RETURN is true, warn ABI change if the vector mode isn't
6567 available for function return value. */
6569 static machine_mode
6570 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6571 bool in_return)
6573 machine_mode mode = TYPE_MODE (type);
6575 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6577 HOST_WIDE_INT size = int_size_in_bytes (type);
6578 if ((size == 8 || size == 16 || size == 32 || size == 64)
6579 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6580 && TYPE_VECTOR_SUBPARTS (type) > 1)
6582 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6584 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6585 mode = MIN_MODE_VECTOR_FLOAT;
6586 else
6587 mode = MIN_MODE_VECTOR_INT;
6589 /* Get the mode which has this inner mode and number of units. */
6590 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6591 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6592 && GET_MODE_INNER (mode) == innermode)
6594 if (size == 64 && !TARGET_AVX512F)
6596 static bool warnedavx512f;
6597 static bool warnedavx512f_ret;
6599 if (cum && cum->warn_avx512f && !warnedavx512f)
6601 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6602 "without AVX512F enabled changes the ABI"))
6603 warnedavx512f = true;
6605 else if (in_return && !warnedavx512f_ret)
6607 if (warning (OPT_Wpsabi, "AVX512F vector return "
6608 "without AVX512F enabled changes the ABI"))
6609 warnedavx512f_ret = true;
6612 return TYPE_MODE (type);
6614 else if (size == 32 && !TARGET_AVX)
6616 static bool warnedavx;
6617 static bool warnedavx_ret;
6619 if (cum && cum->warn_avx && !warnedavx)
6621 if (warning (OPT_Wpsabi, "AVX vector argument "
6622 "without AVX enabled changes the ABI"))
6623 warnedavx = true;
6625 else if (in_return && !warnedavx_ret)
6627 if (warning (OPT_Wpsabi, "AVX vector return "
6628 "without AVX enabled changes the ABI"))
6629 warnedavx_ret = true;
6632 return TYPE_MODE (type);
6634 else if (((size == 8 && TARGET_64BIT) || size == 16)
6635 && !TARGET_SSE)
6637 static bool warnedsse;
6638 static bool warnedsse_ret;
6640 if (cum && cum->warn_sse && !warnedsse)
6642 if (warning (OPT_Wpsabi, "SSE vector argument "
6643 "without SSE enabled changes the ABI"))
6644 warnedsse = true;
6646 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6648 if (warning (OPT_Wpsabi, "SSE vector return "
6649 "without SSE enabled changes the ABI"))
6650 warnedsse_ret = true;
6653 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6655 static bool warnedmmx;
6656 static bool warnedmmx_ret;
6658 if (cum && cum->warn_mmx && !warnedmmx)
6660 if (warning (OPT_Wpsabi, "MMX vector argument "
6661 "without MMX enabled changes the ABI"))
6662 warnedmmx = true;
6664 else if (in_return && !warnedmmx_ret)
6666 if (warning (OPT_Wpsabi, "MMX vector return "
6667 "without MMX enabled changes the ABI"))
6668 warnedmmx_ret = true;
6671 return mode;
6674 gcc_unreachable ();
6678 return mode;
6681 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6682 this may not agree with the mode that the type system has chosen for the
6683 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6684 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6686 static rtx
6687 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6688 unsigned int regno)
6690 rtx tmp;
6692 if (orig_mode != BLKmode)
6693 tmp = gen_rtx_REG (orig_mode, regno);
6694 else
6696 tmp = gen_rtx_REG (mode, regno);
6697 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6698 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6701 return tmp;
6704 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6705 of this code is to classify each 8bytes of incoming argument by the register
6706 class and assign registers accordingly. */
6708 /* Return the union class of CLASS1 and CLASS2.
6709 See the x86-64 PS ABI for details. */
6711 static enum x86_64_reg_class
6712 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6714 /* Rule #1: If both classes are equal, this is the resulting class. */
6715 if (class1 == class2)
6716 return class1;
6718 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6719 the other class. */
6720 if (class1 == X86_64_NO_CLASS)
6721 return class2;
6722 if (class2 == X86_64_NO_CLASS)
6723 return class1;
6725 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6726 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6727 return X86_64_MEMORY_CLASS;
6729 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6730 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6731 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6732 return X86_64_INTEGERSI_CLASS;
6733 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6734 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6735 return X86_64_INTEGER_CLASS;
6737 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6738 MEMORY is used. */
6739 if (class1 == X86_64_X87_CLASS
6740 || class1 == X86_64_X87UP_CLASS
6741 || class1 == X86_64_COMPLEX_X87_CLASS
6742 || class2 == X86_64_X87_CLASS
6743 || class2 == X86_64_X87UP_CLASS
6744 || class2 == X86_64_COMPLEX_X87_CLASS)
6745 return X86_64_MEMORY_CLASS;
6747 /* Rule #6: Otherwise class SSE is used. */
6748 return X86_64_SSE_CLASS;
6751 /* Classify the argument of type TYPE and mode MODE.
6752 CLASSES will be filled by the register class used to pass each word
6753 of the operand. The number of words is returned. In case the parameter
6754 should be passed in memory, 0 is returned. As a special case for zero
6755 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6757 BIT_OFFSET is used internally for handling records and specifies offset
6758 of the offset in bits modulo 512 to avoid overflow cases.
6760 See the x86-64 PS ABI for details.
6763 static int
6764 classify_argument (machine_mode mode, const_tree type,
6765 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6767 HOST_WIDE_INT bytes =
6768 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6769 int words
6770 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6772 /* Variable sized entities are always passed/returned in memory. */
6773 if (bytes < 0)
6774 return 0;
6776 if (mode != VOIDmode
6777 && targetm.calls.must_pass_in_stack (mode, type))
6778 return 0;
6780 if (type && AGGREGATE_TYPE_P (type))
6782 int i;
6783 tree field;
6784 enum x86_64_reg_class subclasses[MAX_CLASSES];
6786 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6787 if (bytes > 64)
6788 return 0;
6790 for (i = 0; i < words; i++)
6791 classes[i] = X86_64_NO_CLASS;
6793 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6794 signalize memory class, so handle it as special case. */
6795 if (!words)
6797 classes[0] = X86_64_NO_CLASS;
6798 return 1;
6801 /* Classify each field of record and merge classes. */
6802 switch (TREE_CODE (type))
6804 case RECORD_TYPE:
6805 /* And now merge the fields of structure. */
6806 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6808 if (TREE_CODE (field) == FIELD_DECL)
6810 int num;
6812 if (TREE_TYPE (field) == error_mark_node)
6813 continue;
6815 /* Bitfields are always classified as integer. Handle them
6816 early, since later code would consider them to be
6817 misaligned integers. */
6818 if (DECL_BIT_FIELD (field))
6820 for (i = (int_bit_position (field)
6821 + (bit_offset % 64)) / 8 / 8;
6822 i < ((int_bit_position (field) + (bit_offset % 64))
6823 + tree_to_shwi (DECL_SIZE (field))
6824 + 63) / 8 / 8; i++)
6825 classes[i] =
6826 merge_classes (X86_64_INTEGER_CLASS,
6827 classes[i]);
6829 else
6831 int pos;
6833 type = TREE_TYPE (field);
6835 /* Flexible array member is ignored. */
6836 if (TYPE_MODE (type) == BLKmode
6837 && TREE_CODE (type) == ARRAY_TYPE
6838 && TYPE_SIZE (type) == NULL_TREE
6839 && TYPE_DOMAIN (type) != NULL_TREE
6840 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6841 == NULL_TREE))
6843 static bool warned;
6845 if (!warned && warn_psabi)
6847 warned = true;
6848 inform (input_location,
6849 "the ABI of passing struct with"
6850 " a flexible array member has"
6851 " changed in GCC 4.4");
6853 continue;
6855 num = classify_argument (TYPE_MODE (type), type,
6856 subclasses,
6857 (int_bit_position (field)
6858 + bit_offset) % 512);
6859 if (!num)
6860 return 0;
6861 pos = (int_bit_position (field)
6862 + (bit_offset % 64)) / 8 / 8;
6863 for (i = 0; i < num && (i + pos) < words; i++)
6864 classes[i + pos] =
6865 merge_classes (subclasses[i], classes[i + pos]);
6869 break;
6871 case ARRAY_TYPE:
6872 /* Arrays are handled as small records. */
6874 int num;
6875 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6876 TREE_TYPE (type), subclasses, bit_offset);
6877 if (!num)
6878 return 0;
6880 /* The partial classes are now full classes. */
6881 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6882 subclasses[0] = X86_64_SSE_CLASS;
6883 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6884 && !((bit_offset % 64) == 0 && bytes == 4))
6885 subclasses[0] = X86_64_INTEGER_CLASS;
6887 for (i = 0; i < words; i++)
6888 classes[i] = subclasses[i % num];
6890 break;
6892 case UNION_TYPE:
6893 case QUAL_UNION_TYPE:
6894 /* Unions are similar to RECORD_TYPE but offset is always 0.
6896 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6898 if (TREE_CODE (field) == FIELD_DECL)
6900 int num;
6902 if (TREE_TYPE (field) == error_mark_node)
6903 continue;
6905 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6906 TREE_TYPE (field), subclasses,
6907 bit_offset);
6908 if (!num)
6909 return 0;
6910 for (i = 0; i < num && i < words; i++)
6911 classes[i] = merge_classes (subclasses[i], classes[i]);
6914 break;
6916 default:
6917 gcc_unreachable ();
6920 if (words > 2)
6922 /* When size > 16 bytes, if the first one isn't
6923 X86_64_SSE_CLASS or any other ones aren't
6924 X86_64_SSEUP_CLASS, everything should be passed in
6925 memory. */
6926 if (classes[0] != X86_64_SSE_CLASS)
6927 return 0;
6929 for (i = 1; i < words; i++)
6930 if (classes[i] != X86_64_SSEUP_CLASS)
6931 return 0;
6934 /* Final merger cleanup. */
6935 for (i = 0; i < words; i++)
6937 /* If one class is MEMORY, everything should be passed in
6938 memory. */
6939 if (classes[i] == X86_64_MEMORY_CLASS)
6940 return 0;
6942 /* The X86_64_SSEUP_CLASS should be always preceded by
6943 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6944 if (classes[i] == X86_64_SSEUP_CLASS
6945 && classes[i - 1] != X86_64_SSE_CLASS
6946 && classes[i - 1] != X86_64_SSEUP_CLASS)
6948 /* The first one should never be X86_64_SSEUP_CLASS. */
6949 gcc_assert (i != 0);
6950 classes[i] = X86_64_SSE_CLASS;
6953 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6954 everything should be passed in memory. */
6955 if (classes[i] == X86_64_X87UP_CLASS
6956 && (classes[i - 1] != X86_64_X87_CLASS))
6958 static bool warned;
6960 /* The first one should never be X86_64_X87UP_CLASS. */
6961 gcc_assert (i != 0);
6962 if (!warned && warn_psabi)
6964 warned = true;
6965 inform (input_location,
6966 "the ABI of passing union with long double"
6967 " has changed in GCC 4.4");
6969 return 0;
6972 return words;
6975 /* Compute alignment needed. We align all types to natural boundaries with
6976 exception of XFmode that is aligned to 64bits. */
6977 if (mode != VOIDmode && mode != BLKmode)
6979 int mode_alignment = GET_MODE_BITSIZE (mode);
6981 if (mode == XFmode)
6982 mode_alignment = 128;
6983 else if (mode == XCmode)
6984 mode_alignment = 256;
6985 if (COMPLEX_MODE_P (mode))
6986 mode_alignment /= 2;
6987 /* Misaligned fields are always returned in memory. */
6988 if (bit_offset % mode_alignment)
6989 return 0;
6992 /* for V1xx modes, just use the base mode */
6993 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6994 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6995 mode = GET_MODE_INNER (mode);
6997 /* Classification of atomic types. */
6998 switch (mode)
7000 case SDmode:
7001 case DDmode:
7002 classes[0] = X86_64_SSE_CLASS;
7003 return 1;
7004 case TDmode:
7005 classes[0] = X86_64_SSE_CLASS;
7006 classes[1] = X86_64_SSEUP_CLASS;
7007 return 2;
7008 case DImode:
7009 case SImode:
7010 case HImode:
7011 case QImode:
7012 case CSImode:
7013 case CHImode:
7014 case CQImode:
7016 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
7018 /* Analyze last 128 bits only. */
7019 size = (size - 1) & 0x7f;
7021 if (size < 32)
7023 classes[0] = X86_64_INTEGERSI_CLASS;
7024 return 1;
7026 else if (size < 64)
7028 classes[0] = X86_64_INTEGER_CLASS;
7029 return 1;
7031 else if (size < 64+32)
7033 classes[0] = X86_64_INTEGER_CLASS;
7034 classes[1] = X86_64_INTEGERSI_CLASS;
7035 return 2;
7037 else if (size < 64+64)
7039 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7040 return 2;
7042 else
7043 gcc_unreachable ();
7045 case CDImode:
7046 case TImode:
7047 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7048 return 2;
7049 case COImode:
7050 case OImode:
7051 /* OImode shouldn't be used directly. */
7052 gcc_unreachable ();
7053 case CTImode:
7054 return 0;
7055 case SFmode:
7056 if (!(bit_offset % 64))
7057 classes[0] = X86_64_SSESF_CLASS;
7058 else
7059 classes[0] = X86_64_SSE_CLASS;
7060 return 1;
7061 case DFmode:
7062 classes[0] = X86_64_SSEDF_CLASS;
7063 return 1;
7064 case XFmode:
7065 classes[0] = X86_64_X87_CLASS;
7066 classes[1] = X86_64_X87UP_CLASS;
7067 return 2;
7068 case TFmode:
7069 classes[0] = X86_64_SSE_CLASS;
7070 classes[1] = X86_64_SSEUP_CLASS;
7071 return 2;
7072 case SCmode:
7073 classes[0] = X86_64_SSE_CLASS;
7074 if (!(bit_offset % 64))
7075 return 1;
7076 else
7078 static bool warned;
7080 if (!warned && warn_psabi)
7082 warned = true;
7083 inform (input_location,
7084 "the ABI of passing structure with complex float"
7085 " member has changed in GCC 4.4");
7087 classes[1] = X86_64_SSESF_CLASS;
7088 return 2;
7090 case DCmode:
7091 classes[0] = X86_64_SSEDF_CLASS;
7092 classes[1] = X86_64_SSEDF_CLASS;
7093 return 2;
7094 case XCmode:
7095 classes[0] = X86_64_COMPLEX_X87_CLASS;
7096 return 1;
7097 case TCmode:
7098 /* This modes is larger than 16 bytes. */
7099 return 0;
7100 case V8SFmode:
7101 case V8SImode:
7102 case V32QImode:
7103 case V16HImode:
7104 case V4DFmode:
7105 case V4DImode:
7106 classes[0] = X86_64_SSE_CLASS;
7107 classes[1] = X86_64_SSEUP_CLASS;
7108 classes[2] = X86_64_SSEUP_CLASS;
7109 classes[3] = X86_64_SSEUP_CLASS;
7110 return 4;
7111 case V8DFmode:
7112 case V16SFmode:
7113 case V8DImode:
7114 case V16SImode:
7115 case V32HImode:
7116 case V64QImode:
7117 classes[0] = X86_64_SSE_CLASS;
7118 classes[1] = X86_64_SSEUP_CLASS;
7119 classes[2] = X86_64_SSEUP_CLASS;
7120 classes[3] = X86_64_SSEUP_CLASS;
7121 classes[4] = X86_64_SSEUP_CLASS;
7122 classes[5] = X86_64_SSEUP_CLASS;
7123 classes[6] = X86_64_SSEUP_CLASS;
7124 classes[7] = X86_64_SSEUP_CLASS;
7125 return 8;
7126 case V4SFmode:
7127 case V4SImode:
7128 case V16QImode:
7129 case V8HImode:
7130 case V2DFmode:
7131 case V2DImode:
7132 classes[0] = X86_64_SSE_CLASS;
7133 classes[1] = X86_64_SSEUP_CLASS;
7134 return 2;
7135 case V1TImode:
7136 case V1DImode:
7137 case V2SFmode:
7138 case V2SImode:
7139 case V4HImode:
7140 case V8QImode:
7141 classes[0] = X86_64_SSE_CLASS;
7142 return 1;
7143 case BLKmode:
7144 case VOIDmode:
7145 return 0;
7146 default:
7147 gcc_assert (VECTOR_MODE_P (mode));
7149 if (bytes > 16)
7150 return 0;
7152 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7154 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7155 classes[0] = X86_64_INTEGERSI_CLASS;
7156 else
7157 classes[0] = X86_64_INTEGER_CLASS;
7158 classes[1] = X86_64_INTEGER_CLASS;
7159 return 1 + (bytes > 8);
7163 /* Examine the argument and return set number of register required in each
7164 class. Return true iff parameter should be passed in memory. */
7166 static bool
7167 examine_argument (machine_mode mode, const_tree type, int in_return,
7168 int *int_nregs, int *sse_nregs)
7170 enum x86_64_reg_class regclass[MAX_CLASSES];
7171 int n = classify_argument (mode, type, regclass, 0);
7173 *int_nregs = 0;
7174 *sse_nregs = 0;
7176 if (!n)
7177 return true;
7178 for (n--; n >= 0; n--)
7179 switch (regclass[n])
7181 case X86_64_INTEGER_CLASS:
7182 case X86_64_INTEGERSI_CLASS:
7183 (*int_nregs)++;
7184 break;
7185 case X86_64_SSE_CLASS:
7186 case X86_64_SSESF_CLASS:
7187 case X86_64_SSEDF_CLASS:
7188 (*sse_nregs)++;
7189 break;
7190 case X86_64_NO_CLASS:
7191 case X86_64_SSEUP_CLASS:
7192 break;
7193 case X86_64_X87_CLASS:
7194 case X86_64_X87UP_CLASS:
7195 case X86_64_COMPLEX_X87_CLASS:
7196 if (!in_return)
7197 return true;
7198 break;
7199 case X86_64_MEMORY_CLASS:
7200 gcc_unreachable ();
7203 return false;
7206 /* Construct container for the argument used by GCC interface. See
7207 FUNCTION_ARG for the detailed description. */
7209 static rtx
7210 construct_container (machine_mode mode, machine_mode orig_mode,
7211 const_tree type, int in_return, int nintregs, int nsseregs,
7212 const int *intreg, int sse_regno)
7214 /* The following variables hold the static issued_error state. */
7215 static bool issued_sse_arg_error;
7216 static bool issued_sse_ret_error;
7217 static bool issued_x87_ret_error;
7219 machine_mode tmpmode;
7220 int bytes =
7221 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7222 enum x86_64_reg_class regclass[MAX_CLASSES];
7223 int n;
7224 int i;
7225 int nexps = 0;
7226 int needed_sseregs, needed_intregs;
7227 rtx exp[MAX_CLASSES];
7228 rtx ret;
7230 n = classify_argument (mode, type, regclass, 0);
7231 if (!n)
7232 return NULL;
7233 if (examine_argument (mode, type, in_return, &needed_intregs,
7234 &needed_sseregs))
7235 return NULL;
7236 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7237 return NULL;
7239 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7240 some less clueful developer tries to use floating-point anyway. */
7241 if (needed_sseregs && !TARGET_SSE)
7243 if (in_return)
7245 if (!issued_sse_ret_error)
7247 error ("SSE register return with SSE disabled");
7248 issued_sse_ret_error = true;
7251 else if (!issued_sse_arg_error)
7253 error ("SSE register argument with SSE disabled");
7254 issued_sse_arg_error = true;
7256 return NULL;
7259 /* Likewise, error if the ABI requires us to return values in the
7260 x87 registers and the user specified -mno-80387. */
7261 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7262 for (i = 0; i < n; i++)
7263 if (regclass[i] == X86_64_X87_CLASS
7264 || regclass[i] == X86_64_X87UP_CLASS
7265 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7267 if (!issued_x87_ret_error)
7269 error ("x87 register return with x87 disabled");
7270 issued_x87_ret_error = true;
7272 return NULL;
7275 /* First construct simple cases. Avoid SCmode, since we want to use
7276 single register to pass this type. */
7277 if (n == 1 && mode != SCmode)
7278 switch (regclass[0])
7280 case X86_64_INTEGER_CLASS:
7281 case X86_64_INTEGERSI_CLASS:
7282 return gen_rtx_REG (mode, intreg[0]);
7283 case X86_64_SSE_CLASS:
7284 case X86_64_SSESF_CLASS:
7285 case X86_64_SSEDF_CLASS:
7286 if (mode != BLKmode)
7287 return gen_reg_or_parallel (mode, orig_mode,
7288 SSE_REGNO (sse_regno));
7289 break;
7290 case X86_64_X87_CLASS:
7291 case X86_64_COMPLEX_X87_CLASS:
7292 return gen_rtx_REG (mode, FIRST_STACK_REG);
7293 case X86_64_NO_CLASS:
7294 /* Zero sized array, struct or class. */
7295 return NULL;
7296 default:
7297 gcc_unreachable ();
7299 if (n == 2
7300 && regclass[0] == X86_64_SSE_CLASS
7301 && regclass[1] == X86_64_SSEUP_CLASS
7302 && mode != BLKmode)
7303 return gen_reg_or_parallel (mode, orig_mode,
7304 SSE_REGNO (sse_regno));
7305 if (n == 4
7306 && regclass[0] == X86_64_SSE_CLASS
7307 && regclass[1] == X86_64_SSEUP_CLASS
7308 && regclass[2] == X86_64_SSEUP_CLASS
7309 && regclass[3] == X86_64_SSEUP_CLASS
7310 && mode != BLKmode)
7311 return gen_reg_or_parallel (mode, orig_mode,
7312 SSE_REGNO (sse_regno));
7313 if (n == 8
7314 && regclass[0] == X86_64_SSE_CLASS
7315 && regclass[1] == X86_64_SSEUP_CLASS
7316 && regclass[2] == X86_64_SSEUP_CLASS
7317 && regclass[3] == X86_64_SSEUP_CLASS
7318 && regclass[4] == X86_64_SSEUP_CLASS
7319 && regclass[5] == X86_64_SSEUP_CLASS
7320 && regclass[6] == X86_64_SSEUP_CLASS
7321 && regclass[7] == X86_64_SSEUP_CLASS
7322 && mode != BLKmode)
7323 return gen_reg_or_parallel (mode, orig_mode,
7324 SSE_REGNO (sse_regno));
7325 if (n == 2
7326 && regclass[0] == X86_64_X87_CLASS
7327 && regclass[1] == X86_64_X87UP_CLASS)
7328 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7330 if (n == 2
7331 && regclass[0] == X86_64_INTEGER_CLASS
7332 && regclass[1] == X86_64_INTEGER_CLASS
7333 && (mode == CDImode || mode == TImode)
7334 && intreg[0] + 1 == intreg[1])
7335 return gen_rtx_REG (mode, intreg[0]);
7337 /* Otherwise figure out the entries of the PARALLEL. */
7338 for (i = 0; i < n; i++)
7340 int pos;
7342 switch (regclass[i])
7344 case X86_64_NO_CLASS:
7345 break;
7346 case X86_64_INTEGER_CLASS:
7347 case X86_64_INTEGERSI_CLASS:
7348 /* Merge TImodes on aligned occasions here too. */
7349 if (i * 8 + 8 > bytes)
7350 tmpmode
7351 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7352 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7353 tmpmode = SImode;
7354 else
7355 tmpmode = DImode;
7356 /* We've requested 24 bytes we
7357 don't have mode for. Use DImode. */
7358 if (tmpmode == BLKmode)
7359 tmpmode = DImode;
7360 exp [nexps++]
7361 = gen_rtx_EXPR_LIST (VOIDmode,
7362 gen_rtx_REG (tmpmode, *intreg),
7363 GEN_INT (i*8));
7364 intreg++;
7365 break;
7366 case X86_64_SSESF_CLASS:
7367 exp [nexps++]
7368 = gen_rtx_EXPR_LIST (VOIDmode,
7369 gen_rtx_REG (SFmode,
7370 SSE_REGNO (sse_regno)),
7371 GEN_INT (i*8));
7372 sse_regno++;
7373 break;
7374 case X86_64_SSEDF_CLASS:
7375 exp [nexps++]
7376 = gen_rtx_EXPR_LIST (VOIDmode,
7377 gen_rtx_REG (DFmode,
7378 SSE_REGNO (sse_regno)),
7379 GEN_INT (i*8));
7380 sse_regno++;
7381 break;
7382 case X86_64_SSE_CLASS:
7383 pos = i;
7384 switch (n)
7386 case 1:
7387 tmpmode = DImode;
7388 break;
7389 case 2:
7390 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7392 tmpmode = TImode;
7393 i++;
7395 else
7396 tmpmode = DImode;
7397 break;
7398 case 4:
7399 gcc_assert (i == 0
7400 && regclass[1] == X86_64_SSEUP_CLASS
7401 && regclass[2] == X86_64_SSEUP_CLASS
7402 && regclass[3] == X86_64_SSEUP_CLASS);
7403 tmpmode = OImode;
7404 i += 3;
7405 break;
7406 case 8:
7407 gcc_assert (i == 0
7408 && regclass[1] == X86_64_SSEUP_CLASS
7409 && regclass[2] == X86_64_SSEUP_CLASS
7410 && regclass[3] == X86_64_SSEUP_CLASS
7411 && regclass[4] == X86_64_SSEUP_CLASS
7412 && regclass[5] == X86_64_SSEUP_CLASS
7413 && regclass[6] == X86_64_SSEUP_CLASS
7414 && regclass[7] == X86_64_SSEUP_CLASS);
7415 tmpmode = XImode;
7416 i += 7;
7417 break;
7418 default:
7419 gcc_unreachable ();
7421 exp [nexps++]
7422 = gen_rtx_EXPR_LIST (VOIDmode,
7423 gen_rtx_REG (tmpmode,
7424 SSE_REGNO (sse_regno)),
7425 GEN_INT (pos*8));
7426 sse_regno++;
7427 break;
7428 default:
7429 gcc_unreachable ();
7433 /* Empty aligned struct, union or class. */
7434 if (nexps == 0)
7435 return NULL;
7437 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7438 for (i = 0; i < nexps; i++)
7439 XVECEXP (ret, 0, i) = exp [i];
7440 return ret;
7443 /* Update the data in CUM to advance over an argument of mode MODE
7444 and data type TYPE. (TYPE is null for libcalls where that information
7445 may not be available.)
7447 Return a number of integer regsiters advanced over. */
7449 static int
7450 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7451 const_tree type, HOST_WIDE_INT bytes,
7452 HOST_WIDE_INT words)
7454 int res = 0;
7456 switch (mode)
7458 default:
7459 break;
7461 case BLKmode:
7462 if (bytes < 0)
7463 break;
7464 /* FALLTHRU */
7466 case DImode:
7467 case SImode:
7468 case HImode:
7469 case QImode:
7470 cum->words += words;
7471 cum->nregs -= words;
7472 cum->regno += words;
7473 if (cum->nregs >= 0)
7474 res = words;
7475 if (cum->nregs <= 0)
7477 cum->nregs = 0;
7478 cum->regno = 0;
7480 break;
7482 case OImode:
7483 /* OImode shouldn't be used directly. */
7484 gcc_unreachable ();
7486 case DFmode:
7487 if (cum->float_in_sse < 2)
7488 break;
7489 case SFmode:
7490 if (cum->float_in_sse < 1)
7491 break;
7492 /* FALLTHRU */
7494 case V8SFmode:
7495 case V8SImode:
7496 case V64QImode:
7497 case V32HImode:
7498 case V16SImode:
7499 case V8DImode:
7500 case V16SFmode:
7501 case V8DFmode:
7502 case V32QImode:
7503 case V16HImode:
7504 case V4DFmode:
7505 case V4DImode:
7506 case TImode:
7507 case V16QImode:
7508 case V8HImode:
7509 case V4SImode:
7510 case V2DImode:
7511 case V4SFmode:
7512 case V2DFmode:
7513 if (!type || !AGGREGATE_TYPE_P (type))
7515 cum->sse_words += words;
7516 cum->sse_nregs -= 1;
7517 cum->sse_regno += 1;
7518 if (cum->sse_nregs <= 0)
7520 cum->sse_nregs = 0;
7521 cum->sse_regno = 0;
7524 break;
7526 case V8QImode:
7527 case V4HImode:
7528 case V2SImode:
7529 case V2SFmode:
7530 case V1TImode:
7531 case V1DImode:
7532 if (!type || !AGGREGATE_TYPE_P (type))
7534 cum->mmx_words += words;
7535 cum->mmx_nregs -= 1;
7536 cum->mmx_regno += 1;
7537 if (cum->mmx_nregs <= 0)
7539 cum->mmx_nregs = 0;
7540 cum->mmx_regno = 0;
7543 break;
7546 return res;
7549 static int
7550 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7551 const_tree type, HOST_WIDE_INT words, bool named)
7553 int int_nregs, sse_nregs;
7555 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7556 if (!named && (VALID_AVX512F_REG_MODE (mode)
7557 || VALID_AVX256_REG_MODE (mode)))
7558 return 0;
7560 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7561 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7563 cum->nregs -= int_nregs;
7564 cum->sse_nregs -= sse_nregs;
7565 cum->regno += int_nregs;
7566 cum->sse_regno += sse_nregs;
7567 return int_nregs;
7569 else
7571 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7572 cum->words = (cum->words + align - 1) & ~(align - 1);
7573 cum->words += words;
7574 return 0;
7578 static int
7579 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7580 HOST_WIDE_INT words)
7582 /* Otherwise, this should be passed indirect. */
7583 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7585 cum->words += words;
7586 if (cum->nregs > 0)
7588 cum->nregs -= 1;
7589 cum->regno += 1;
7590 return 1;
7592 return 0;
7595 /* Update the data in CUM to advance over an argument of mode MODE and
7596 data type TYPE. (TYPE is null for libcalls where that information
7597 may not be available.) */
7599 static void
7600 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7601 const_tree type, bool named)
7603 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7604 HOST_WIDE_INT bytes, words;
7605 int nregs;
7607 if (mode == BLKmode)
7608 bytes = int_size_in_bytes (type);
7609 else
7610 bytes = GET_MODE_SIZE (mode);
7611 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7613 if (type)
7614 mode = type_natural_mode (type, NULL, false);
7616 if ((type && POINTER_BOUNDS_TYPE_P (type))
7617 || POINTER_BOUNDS_MODE_P (mode))
7619 /* If we pass bounds in BT then just update remained bounds count. */
7620 if (cum->bnds_in_bt)
7622 cum->bnds_in_bt--;
7623 return;
7626 /* Update remained number of bounds to force. */
7627 if (cum->force_bnd_pass)
7628 cum->force_bnd_pass--;
7630 cum->bnd_regno++;
7632 return;
7635 /* The first arg not going to Bounds Tables resets this counter. */
7636 cum->bnds_in_bt = 0;
7637 /* For unnamed args we always pass bounds to avoid bounds mess when
7638 passed and received types do not match. If bounds do not follow
7639 unnamed arg, still pretend required number of bounds were passed. */
7640 if (cum->force_bnd_pass)
7642 cum->bnd_regno += cum->force_bnd_pass;
7643 cum->force_bnd_pass = 0;
7646 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7647 nregs = function_arg_advance_ms_64 (cum, bytes, words);
7648 else if (TARGET_64BIT)
7649 nregs = function_arg_advance_64 (cum, mode, type, words, named);
7650 else
7651 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7653 /* For stdarg we expect bounds to be passed for each value passed
7654 in register. */
7655 if (cum->stdarg)
7656 cum->force_bnd_pass = nregs;
7657 /* For pointers passed in memory we expect bounds passed in Bounds
7658 Table. */
7659 if (!nregs)
7660 cum->bnds_in_bt = chkp_type_bounds_count (type);
7663 /* Define where to put the arguments to a function.
7664 Value is zero to push the argument on the stack,
7665 or a hard register in which to store the argument.
7667 MODE is the argument's machine mode.
7668 TYPE is the data type of the argument (as a tree).
7669 This is null for libcalls where that information may
7670 not be available.
7671 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7672 the preceding args and about the function being called.
7673 NAMED is nonzero if this argument is a named parameter
7674 (otherwise it is an extra parameter matching an ellipsis). */
7676 static rtx
7677 function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7678 machine_mode orig_mode, const_tree type,
7679 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7681 /* Avoid the AL settings for the Unix64 ABI. */
7682 if (mode == VOIDmode)
7683 return constm1_rtx;
7685 switch (mode)
7687 default:
7688 break;
7690 case BLKmode:
7691 if (bytes < 0)
7692 break;
7693 /* FALLTHRU */
7694 case DImode:
7695 case SImode:
7696 case HImode:
7697 case QImode:
7698 if (words <= cum->nregs)
7700 int regno = cum->regno;
7702 /* Fastcall allocates the first two DWORD (SImode) or
7703 smaller arguments to ECX and EDX if it isn't an
7704 aggregate type . */
7705 if (cum->fastcall)
7707 if (mode == BLKmode
7708 || mode == DImode
7709 || (type && AGGREGATE_TYPE_P (type)))
7710 break;
7712 /* ECX not EAX is the first allocated register. */
7713 if (regno == AX_REG)
7714 regno = CX_REG;
7716 return gen_rtx_REG (mode, regno);
7718 break;
7720 case DFmode:
7721 if (cum->float_in_sse < 2)
7722 break;
7723 case SFmode:
7724 if (cum->float_in_sse < 1)
7725 break;
7726 /* FALLTHRU */
7727 case TImode:
7728 /* In 32bit, we pass TImode in xmm registers. */
7729 case V16QImode:
7730 case V8HImode:
7731 case V4SImode:
7732 case V2DImode:
7733 case V4SFmode:
7734 case V2DFmode:
7735 if (!type || !AGGREGATE_TYPE_P (type))
7737 if (cum->sse_nregs)
7738 return gen_reg_or_parallel (mode, orig_mode,
7739 cum->sse_regno + FIRST_SSE_REG);
7741 break;
7743 case OImode:
7744 case XImode:
7745 /* OImode and XImode shouldn't be used directly. */
7746 gcc_unreachable ();
7748 case V64QImode:
7749 case V32HImode:
7750 case V16SImode:
7751 case V8DImode:
7752 case V16SFmode:
7753 case V8DFmode:
7754 case V8SFmode:
7755 case V8SImode:
7756 case V32QImode:
7757 case V16HImode:
7758 case V4DFmode:
7759 case V4DImode:
7760 if (!type || !AGGREGATE_TYPE_P (type))
7762 if (cum->sse_nregs)
7763 return gen_reg_or_parallel (mode, orig_mode,
7764 cum->sse_regno + FIRST_SSE_REG);
7766 break;
7768 case V8QImode:
7769 case V4HImode:
7770 case V2SImode:
7771 case V2SFmode:
7772 case V1TImode:
7773 case V1DImode:
7774 if (!type || !AGGREGATE_TYPE_P (type))
7776 if (cum->mmx_nregs)
7777 return gen_reg_or_parallel (mode, orig_mode,
7778 cum->mmx_regno + FIRST_MMX_REG);
7780 break;
7783 return NULL_RTX;
7786 static rtx
7787 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7788 machine_mode orig_mode, const_tree type, bool named)
7790 /* Handle a hidden AL argument containing number of registers
7791 for varargs x86-64 functions. */
7792 if (mode == VOIDmode)
7793 return GEN_INT (cum->maybe_vaarg
7794 ? (cum->sse_nregs < 0
7795 ? X86_64_SSE_REGPARM_MAX
7796 : cum->sse_regno)
7797 : -1);
7799 switch (mode)
7801 default:
7802 break;
7804 case V8SFmode:
7805 case V8SImode:
7806 case V32QImode:
7807 case V16HImode:
7808 case V4DFmode:
7809 case V4DImode:
7810 case V16SFmode:
7811 case V16SImode:
7812 case V64QImode:
7813 case V32HImode:
7814 case V8DFmode:
7815 case V8DImode:
7816 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7817 if (!named)
7818 return NULL;
7819 break;
7822 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7823 cum->sse_nregs,
7824 &x86_64_int_parameter_registers [cum->regno],
7825 cum->sse_regno);
7828 static rtx
7829 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7830 machine_mode orig_mode, bool named,
7831 HOST_WIDE_INT bytes)
7833 unsigned int regno;
7835 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7836 We use value of -2 to specify that current function call is MSABI. */
7837 if (mode == VOIDmode)
7838 return GEN_INT (-2);
7840 /* If we've run out of registers, it goes on the stack. */
7841 if (cum->nregs == 0)
7842 return NULL_RTX;
7844 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7846 /* Only floating point modes are passed in anything but integer regs. */
7847 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7849 if (named)
7850 regno = cum->regno + FIRST_SSE_REG;
7851 else
7853 rtx t1, t2;
7855 /* Unnamed floating parameters are passed in both the
7856 SSE and integer registers. */
7857 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7858 t2 = gen_rtx_REG (mode, regno);
7859 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7860 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7861 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7864 /* Handle aggregated types passed in register. */
7865 if (orig_mode == BLKmode)
7867 if (bytes > 0 && bytes <= 8)
7868 mode = (bytes > 4 ? DImode : SImode);
7869 if (mode == BLKmode)
7870 mode = DImode;
7873 return gen_reg_or_parallel (mode, orig_mode, regno);
7876 /* Return where to put the arguments to a function.
7877 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7879 MODE is the argument's machine mode. TYPE is the data type of the
7880 argument. It is null for libcalls where that information may not be
7881 available. CUM gives information about the preceding args and about
7882 the function being called. NAMED is nonzero if this argument is a
7883 named parameter (otherwise it is an extra parameter matching an
7884 ellipsis). */
7886 static rtx
7887 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
7888 const_tree type, bool named)
7890 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7891 machine_mode mode = omode;
7892 HOST_WIDE_INT bytes, words;
7893 rtx arg;
7895 /* All pointer bounds argumntas are handled separately here. */
7896 if ((type && POINTER_BOUNDS_TYPE_P (type))
7897 || POINTER_BOUNDS_MODE_P (mode))
7899 /* Return NULL if bounds are forced to go in Bounds Table. */
7900 if (cum->bnds_in_bt)
7901 arg = NULL;
7902 /* Return the next available bound reg if any. */
7903 else if (cum->bnd_regno <= LAST_BND_REG)
7904 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
7905 /* Return the next special slot number otherwise. */
7906 else
7907 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
7909 return arg;
7912 if (mode == BLKmode)
7913 bytes = int_size_in_bytes (type);
7914 else
7915 bytes = GET_MODE_SIZE (mode);
7916 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7918 /* To simplify the code below, represent vector types with a vector mode
7919 even if MMX/SSE are not active. */
7920 if (type && TREE_CODE (type) == VECTOR_TYPE)
7921 mode = type_natural_mode (type, cum, false);
7923 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7924 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7925 else if (TARGET_64BIT)
7926 arg = function_arg_64 (cum, mode, omode, type, named);
7927 else
7928 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7930 return arg;
7933 /* A C expression that indicates when an argument must be passed by
7934 reference. If nonzero for an argument, a copy of that argument is
7935 made in memory and a pointer to the argument is passed instead of
7936 the argument itself. The pointer is passed in whatever way is
7937 appropriate for passing a pointer to that type. */
7939 static bool
7940 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7941 const_tree type, bool)
7943 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7945 /* Bounds are never passed by reference. */
7946 if ((type && POINTER_BOUNDS_TYPE_P (type))
7947 || POINTER_BOUNDS_MODE_P (mode))
7948 return false;
7950 /* See Windows x64 Software Convention. */
7951 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7953 int msize = (int) GET_MODE_SIZE (mode);
7954 if (type)
7956 /* Arrays are passed by reference. */
7957 if (TREE_CODE (type) == ARRAY_TYPE)
7958 return true;
7960 if (AGGREGATE_TYPE_P (type))
7962 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7963 are passed by reference. */
7964 msize = int_size_in_bytes (type);
7968 /* __m128 is passed by reference. */
7969 switch (msize) {
7970 case 1: case 2: case 4: case 8:
7971 break;
7972 default:
7973 return true;
7976 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7977 return 1;
7979 return 0;
7982 /* Return true when TYPE should be 128bit aligned for 32bit argument
7983 passing ABI. XXX: This function is obsolete and is only used for
7984 checking psABI compatibility with previous versions of GCC. */
7986 static bool
7987 ix86_compat_aligned_value_p (const_tree type)
7989 machine_mode mode = TYPE_MODE (type);
7990 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7991 || mode == TDmode
7992 || mode == TFmode
7993 || mode == TCmode)
7994 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7995 return true;
7996 if (TYPE_ALIGN (type) < 128)
7997 return false;
7999 if (AGGREGATE_TYPE_P (type))
8001 /* Walk the aggregates recursively. */
8002 switch (TREE_CODE (type))
8004 case RECORD_TYPE:
8005 case UNION_TYPE:
8006 case QUAL_UNION_TYPE:
8008 tree field;
8010 /* Walk all the structure fields. */
8011 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8013 if (TREE_CODE (field) == FIELD_DECL
8014 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
8015 return true;
8017 break;
8020 case ARRAY_TYPE:
8021 /* Just for use if some languages passes arrays by value. */
8022 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
8023 return true;
8024 break;
8026 default:
8027 gcc_unreachable ();
8030 return false;
8033 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
8034 XXX: This function is obsolete and is only used for checking psABI
8035 compatibility with previous versions of GCC. */
8037 static unsigned int
8038 ix86_compat_function_arg_boundary (machine_mode mode,
8039 const_tree type, unsigned int align)
8041 /* In 32bit, only _Decimal128 and __float128 are aligned to their
8042 natural boundaries. */
8043 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
8045 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
8046 make an exception for SSE modes since these require 128bit
8047 alignment.
8049 The handling here differs from field_alignment. ICC aligns MMX
8050 arguments to 4 byte boundaries, while structure fields are aligned
8051 to 8 byte boundaries. */
8052 if (!type)
8054 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
8055 align = PARM_BOUNDARY;
8057 else
8059 if (!ix86_compat_aligned_value_p (type))
8060 align = PARM_BOUNDARY;
8063 if (align > BIGGEST_ALIGNMENT)
8064 align = BIGGEST_ALIGNMENT;
8065 return align;
8068 /* Return true when TYPE should be 128bit aligned for 32bit argument
8069 passing ABI. */
8071 static bool
8072 ix86_contains_aligned_value_p (const_tree type)
8074 machine_mode mode = TYPE_MODE (type);
8076 if (mode == XFmode || mode == XCmode)
8077 return false;
8079 if (TYPE_ALIGN (type) < 128)
8080 return false;
8082 if (AGGREGATE_TYPE_P (type))
8084 /* Walk the aggregates recursively. */
8085 switch (TREE_CODE (type))
8087 case RECORD_TYPE:
8088 case UNION_TYPE:
8089 case QUAL_UNION_TYPE:
8091 tree field;
8093 /* Walk all the structure fields. */
8094 for (field = TYPE_FIELDS (type);
8095 field;
8096 field = DECL_CHAIN (field))
8098 if (TREE_CODE (field) == FIELD_DECL
8099 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
8100 return true;
8102 break;
8105 case ARRAY_TYPE:
8106 /* Just for use if some languages passes arrays by value. */
8107 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
8108 return true;
8109 break;
8111 default:
8112 gcc_unreachable ();
8115 else
8116 return TYPE_ALIGN (type) >= 128;
8118 return false;
8121 /* Gives the alignment boundary, in bits, of an argument with the
8122 specified mode and type. */
8124 static unsigned int
8125 ix86_function_arg_boundary (machine_mode mode, const_tree type)
8127 unsigned int align;
8128 if (type)
8130 /* Since the main variant type is used for call, we convert it to
8131 the main variant type. */
8132 type = TYPE_MAIN_VARIANT (type);
8133 align = TYPE_ALIGN (type);
8135 else
8136 align = GET_MODE_ALIGNMENT (mode);
8137 if (align < PARM_BOUNDARY)
8138 align = PARM_BOUNDARY;
8139 else
8141 static bool warned;
8142 unsigned int saved_align = align;
8144 if (!TARGET_64BIT)
8146 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8147 if (!type)
8149 if (mode == XFmode || mode == XCmode)
8150 align = PARM_BOUNDARY;
8152 else if (!ix86_contains_aligned_value_p (type))
8153 align = PARM_BOUNDARY;
8155 if (align < 128)
8156 align = PARM_BOUNDARY;
8159 if (warn_psabi
8160 && !warned
8161 && align != ix86_compat_function_arg_boundary (mode, type,
8162 saved_align))
8164 warned = true;
8165 inform (input_location,
8166 "The ABI for passing parameters with %d-byte"
8167 " alignment has changed in GCC 4.6",
8168 align / BITS_PER_UNIT);
8172 return align;
8175 /* Return true if N is a possible register number of function value. */
8177 static bool
8178 ix86_function_value_regno_p (const unsigned int regno)
8180 switch (regno)
8182 case AX_REG:
8183 return true;
8184 case DX_REG:
8185 return (!TARGET_64BIT || ix86_abi != MS_ABI);
8186 case DI_REG:
8187 case SI_REG:
8188 return TARGET_64BIT && ix86_abi != MS_ABI;
8190 case FIRST_BND_REG:
8191 return chkp_function_instrumented_p (current_function_decl);
8193 /* Complex values are returned in %st(0)/%st(1) pair. */
8194 case ST0_REG:
8195 case ST1_REG:
8196 /* TODO: The function should depend on current function ABI but
8197 builtins.c would need updating then. Therefore we use the
8198 default ABI. */
8199 if (TARGET_64BIT && ix86_abi == MS_ABI)
8200 return false;
8201 return TARGET_FLOAT_RETURNS_IN_80387;
8203 /* Complex values are returned in %xmm0/%xmm1 pair. */
8204 case XMM0_REG:
8205 case XMM1_REG:
8206 return TARGET_SSE;
8208 case MM0_REG:
8209 if (TARGET_MACHO || TARGET_64BIT)
8210 return false;
8211 return TARGET_MMX;
8214 return false;
8217 /* Define how to find the value returned by a function.
8218 VALTYPE is the data type of the value (as a tree).
8219 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8220 otherwise, FUNC is 0. */
8222 static rtx
8223 function_value_32 (machine_mode orig_mode, machine_mode mode,
8224 const_tree fntype, const_tree fn)
8226 unsigned int regno;
8228 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8229 we normally prevent this case when mmx is not available. However
8230 some ABIs may require the result to be returned like DImode. */
8231 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8232 regno = FIRST_MMX_REG;
8234 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8235 we prevent this case when sse is not available. However some ABIs
8236 may require the result to be returned like integer TImode. */
8237 else if (mode == TImode
8238 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8239 regno = FIRST_SSE_REG;
8241 /* 32-byte vector modes in %ymm0. */
8242 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8243 regno = FIRST_SSE_REG;
8245 /* 64-byte vector modes in %zmm0. */
8246 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8247 regno = FIRST_SSE_REG;
8249 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8250 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8251 regno = FIRST_FLOAT_REG;
8252 else
8253 /* Most things go in %eax. */
8254 regno = AX_REG;
8256 /* Override FP return register with %xmm0 for local functions when
8257 SSE math is enabled or for functions with sseregparm attribute. */
8258 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8260 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8261 if ((sse_level >= 1 && mode == SFmode)
8262 || (sse_level == 2 && mode == DFmode))
8263 regno = FIRST_SSE_REG;
8266 /* OImode shouldn't be used directly. */
8267 gcc_assert (mode != OImode);
8269 return gen_rtx_REG (orig_mode, regno);
8272 static rtx
8273 function_value_64 (machine_mode orig_mode, machine_mode mode,
8274 const_tree valtype)
8276 rtx ret;
8278 /* Handle libcalls, which don't provide a type node. */
8279 if (valtype == NULL)
8281 unsigned int regno;
8283 switch (mode)
8285 case SFmode:
8286 case SCmode:
8287 case DFmode:
8288 case DCmode:
8289 case TFmode:
8290 case SDmode:
8291 case DDmode:
8292 case TDmode:
8293 regno = FIRST_SSE_REG;
8294 break;
8295 case XFmode:
8296 case XCmode:
8297 regno = FIRST_FLOAT_REG;
8298 break;
8299 case TCmode:
8300 return NULL;
8301 default:
8302 regno = AX_REG;
8305 return gen_rtx_REG (mode, regno);
8307 else if (POINTER_TYPE_P (valtype))
8309 /* Pointers are always returned in word_mode. */
8310 mode = word_mode;
8313 ret = construct_container (mode, orig_mode, valtype, 1,
8314 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8315 x86_64_int_return_registers, 0);
8317 /* For zero sized structures, construct_container returns NULL, but we
8318 need to keep rest of compiler happy by returning meaningful value. */
8319 if (!ret)
8320 ret = gen_rtx_REG (orig_mode, AX_REG);
8322 return ret;
8325 static rtx
8326 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8327 const_tree valtype)
8329 unsigned int regno = AX_REG;
8331 if (TARGET_SSE)
8333 switch (GET_MODE_SIZE (mode))
8335 case 16:
8336 if (valtype != NULL_TREE
8337 && !VECTOR_INTEGER_TYPE_P (valtype)
8338 && !VECTOR_INTEGER_TYPE_P (valtype)
8339 && !INTEGRAL_TYPE_P (valtype)
8340 && !VECTOR_FLOAT_TYPE_P (valtype))
8341 break;
8342 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8343 && !COMPLEX_MODE_P (mode))
8344 regno = FIRST_SSE_REG;
8345 break;
8346 case 8:
8347 case 4:
8348 if (mode == SFmode || mode == DFmode)
8349 regno = FIRST_SSE_REG;
8350 break;
8351 default:
8352 break;
8355 return gen_rtx_REG (orig_mode, regno);
8358 static rtx
8359 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8360 machine_mode orig_mode, machine_mode mode)
8362 const_tree fn, fntype;
8364 fn = NULL_TREE;
8365 if (fntype_or_decl && DECL_P (fntype_or_decl))
8366 fn = fntype_or_decl;
8367 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8369 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8370 || POINTER_BOUNDS_MODE_P (mode))
8371 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8372 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8373 return function_value_ms_64 (orig_mode, mode, valtype);
8374 else if (TARGET_64BIT)
8375 return function_value_64 (orig_mode, mode, valtype);
8376 else
8377 return function_value_32 (orig_mode, mode, fntype, fn);
8380 static rtx
8381 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8383 machine_mode mode, orig_mode;
8385 orig_mode = TYPE_MODE (valtype);
8386 mode = type_natural_mode (valtype, NULL, true);
8387 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8390 /* Return an RTX representing a place where a function returns
8391 or recieves pointer bounds or NULL if no bounds are returned.
8393 VALTYPE is a data type of a value returned by the function.
8395 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8396 or FUNCTION_TYPE of the function.
8398 If OUTGOING is false, return a place in which the caller will
8399 see the return value. Otherwise, return a place where a
8400 function returns a value. */
8402 static rtx
8403 ix86_function_value_bounds (const_tree valtype,
8404 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8405 bool outgoing ATTRIBUTE_UNUSED)
8407 rtx res = NULL_RTX;
8409 if (BOUNDED_TYPE_P (valtype))
8410 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8411 else if (chkp_type_has_pointer (valtype))
8413 bitmap slots;
8414 rtx bounds[2];
8415 bitmap_iterator bi;
8416 unsigned i, bnd_no = 0;
8418 bitmap_obstack_initialize (NULL);
8419 slots = BITMAP_ALLOC (NULL);
8420 chkp_find_bound_slots (valtype, slots);
8422 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8424 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8425 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8426 gcc_assert (bnd_no < 2);
8427 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8430 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8432 BITMAP_FREE (slots);
8433 bitmap_obstack_release (NULL);
8435 else
8436 res = NULL_RTX;
8438 return res;
8441 /* Pointer function arguments and return values are promoted to
8442 word_mode. */
8444 static machine_mode
8445 ix86_promote_function_mode (const_tree type, machine_mode mode,
8446 int *punsignedp, const_tree fntype,
8447 int for_return)
8449 if (type != NULL_TREE && POINTER_TYPE_P (type))
8451 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8452 return word_mode;
8454 return default_promote_function_mode (type, mode, punsignedp, fntype,
8455 for_return);
8458 /* Return true if a structure, union or array with MODE containing FIELD
8459 should be accessed using BLKmode. */
8461 static bool
8462 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8464 /* Union with XFmode must be in BLKmode. */
8465 return (mode == XFmode
8466 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8467 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8471 ix86_libcall_value (machine_mode mode)
8473 return ix86_function_value_1 (NULL, NULL, mode, mode);
8476 /* Return true iff type is returned in memory. */
8478 static bool
8479 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8481 #ifdef SUBTARGET_RETURN_IN_MEMORY
8482 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8483 #else
8484 const machine_mode mode = type_natural_mode (type, NULL, true);
8485 HOST_WIDE_INT size;
8487 if (POINTER_BOUNDS_TYPE_P (type))
8488 return false;
8490 if (TARGET_64BIT)
8492 if (ix86_function_type_abi (fntype) == MS_ABI)
8494 size = int_size_in_bytes (type);
8496 /* __m128 is returned in xmm0. */
8497 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8498 || INTEGRAL_TYPE_P (type)
8499 || VECTOR_FLOAT_TYPE_P (type))
8500 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8501 && !COMPLEX_MODE_P (mode)
8502 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8503 return false;
8505 /* Otherwise, the size must be exactly in [1248]. */
8506 return size != 1 && size != 2 && size != 4 && size != 8;
8508 else
8510 int needed_intregs, needed_sseregs;
8512 return examine_argument (mode, type, 1,
8513 &needed_intregs, &needed_sseregs);
8516 else
8518 if (mode == BLKmode)
8519 return true;
8521 size = int_size_in_bytes (type);
8523 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8524 return false;
8526 if (VECTOR_MODE_P (mode) || mode == TImode)
8528 /* User-created vectors small enough to fit in EAX. */
8529 if (size < 8)
8530 return false;
8532 /* Unless ABI prescibes otherwise,
8533 MMX/3dNow values are returned in MM0 if available. */
8535 if (size == 8)
8536 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8538 /* SSE values are returned in XMM0 if available. */
8539 if (size == 16)
8540 return !TARGET_SSE;
8542 /* AVX values are returned in YMM0 if available. */
8543 if (size == 32)
8544 return !TARGET_AVX;
8546 /* AVX512F values are returned in ZMM0 if available. */
8547 if (size == 64)
8548 return !TARGET_AVX512F;
8551 if (mode == XFmode)
8552 return false;
8554 if (size > 12)
8555 return true;
8557 /* OImode shouldn't be used directly. */
8558 gcc_assert (mode != OImode);
8560 return false;
8562 #endif
8566 /* Create the va_list data type. */
8568 /* Returns the calling convention specific va_list date type.
8569 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8571 static tree
8572 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8574 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8576 /* For i386 we use plain pointer to argument area. */
8577 if (!TARGET_64BIT || abi == MS_ABI)
8578 return build_pointer_type (char_type_node);
8580 record = lang_hooks.types.make_type (RECORD_TYPE);
8581 type_decl = build_decl (BUILTINS_LOCATION,
8582 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8584 f_gpr = build_decl (BUILTINS_LOCATION,
8585 FIELD_DECL, get_identifier ("gp_offset"),
8586 unsigned_type_node);
8587 f_fpr = build_decl (BUILTINS_LOCATION,
8588 FIELD_DECL, get_identifier ("fp_offset"),
8589 unsigned_type_node);
8590 f_ovf = build_decl (BUILTINS_LOCATION,
8591 FIELD_DECL, get_identifier ("overflow_arg_area"),
8592 ptr_type_node);
8593 f_sav = build_decl (BUILTINS_LOCATION,
8594 FIELD_DECL, get_identifier ("reg_save_area"),
8595 ptr_type_node);
8597 va_list_gpr_counter_field = f_gpr;
8598 va_list_fpr_counter_field = f_fpr;
8600 DECL_FIELD_CONTEXT (f_gpr) = record;
8601 DECL_FIELD_CONTEXT (f_fpr) = record;
8602 DECL_FIELD_CONTEXT (f_ovf) = record;
8603 DECL_FIELD_CONTEXT (f_sav) = record;
8605 TYPE_STUB_DECL (record) = type_decl;
8606 TYPE_NAME (record) = type_decl;
8607 TYPE_FIELDS (record) = f_gpr;
8608 DECL_CHAIN (f_gpr) = f_fpr;
8609 DECL_CHAIN (f_fpr) = f_ovf;
8610 DECL_CHAIN (f_ovf) = f_sav;
8612 layout_type (record);
8614 /* The correct type is an array type of one element. */
8615 return build_array_type (record, build_index_type (size_zero_node));
8618 /* Setup the builtin va_list data type and for 64-bit the additional
8619 calling convention specific va_list data types. */
8621 static tree
8622 ix86_build_builtin_va_list (void)
8624 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8626 /* Initialize abi specific va_list builtin types. */
8627 if (TARGET_64BIT)
8629 tree t;
8630 if (ix86_abi == MS_ABI)
8632 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8633 if (TREE_CODE (t) != RECORD_TYPE)
8634 t = build_variant_type_copy (t);
8635 sysv_va_list_type_node = t;
8637 else
8639 t = ret;
8640 if (TREE_CODE (t) != RECORD_TYPE)
8641 t = build_variant_type_copy (t);
8642 sysv_va_list_type_node = t;
8644 if (ix86_abi != MS_ABI)
8646 t = ix86_build_builtin_va_list_abi (MS_ABI);
8647 if (TREE_CODE (t) != RECORD_TYPE)
8648 t = build_variant_type_copy (t);
8649 ms_va_list_type_node = t;
8651 else
8653 t = ret;
8654 if (TREE_CODE (t) != RECORD_TYPE)
8655 t = build_variant_type_copy (t);
8656 ms_va_list_type_node = t;
8660 return ret;
8663 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8665 static void
8666 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8668 rtx save_area, mem;
8669 alias_set_type set;
8670 int i, max;
8672 /* GPR size of varargs save area. */
8673 if (cfun->va_list_gpr_size)
8674 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8675 else
8676 ix86_varargs_gpr_size = 0;
8678 /* FPR size of varargs save area. We don't need it if we don't pass
8679 anything in SSE registers. */
8680 if (TARGET_SSE && cfun->va_list_fpr_size)
8681 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8682 else
8683 ix86_varargs_fpr_size = 0;
8685 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8686 return;
8688 save_area = frame_pointer_rtx;
8689 set = get_varargs_alias_set ();
8691 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8692 if (max > X86_64_REGPARM_MAX)
8693 max = X86_64_REGPARM_MAX;
8695 for (i = cum->regno; i < max; i++)
8697 mem = gen_rtx_MEM (word_mode,
8698 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8699 MEM_NOTRAP_P (mem) = 1;
8700 set_mem_alias_set (mem, set);
8701 emit_move_insn (mem,
8702 gen_rtx_REG (word_mode,
8703 x86_64_int_parameter_registers[i]));
8706 if (ix86_varargs_fpr_size)
8708 machine_mode smode;
8709 rtx_code_label *label;
8710 rtx test;
8712 /* Now emit code to save SSE registers. The AX parameter contains number
8713 of SSE parameter registers used to call this function, though all we
8714 actually check here is the zero/non-zero status. */
8716 label = gen_label_rtx ();
8717 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8718 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8719 label));
8721 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8722 we used movdqa (i.e. TImode) instead? Perhaps even better would
8723 be if we could determine the real mode of the data, via a hook
8724 into pass_stdarg. Ignore all that for now. */
8725 smode = V4SFmode;
8726 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8727 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8729 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8730 if (max > X86_64_SSE_REGPARM_MAX)
8731 max = X86_64_SSE_REGPARM_MAX;
8733 for (i = cum->sse_regno; i < max; ++i)
8735 mem = plus_constant (Pmode, save_area,
8736 i * 16 + ix86_varargs_gpr_size);
8737 mem = gen_rtx_MEM (smode, mem);
8738 MEM_NOTRAP_P (mem) = 1;
8739 set_mem_alias_set (mem, set);
8740 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8742 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8745 emit_label (label);
8749 static void
8750 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8752 alias_set_type set = get_varargs_alias_set ();
8753 int i;
8755 /* Reset to zero, as there might be a sysv vaarg used
8756 before. */
8757 ix86_varargs_gpr_size = 0;
8758 ix86_varargs_fpr_size = 0;
8760 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8762 rtx reg, mem;
8764 mem = gen_rtx_MEM (Pmode,
8765 plus_constant (Pmode, virtual_incoming_args_rtx,
8766 i * UNITS_PER_WORD));
8767 MEM_NOTRAP_P (mem) = 1;
8768 set_mem_alias_set (mem, set);
8770 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8771 emit_move_insn (mem, reg);
8775 static void
8776 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8777 tree type, int *, int no_rtl)
8779 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8780 CUMULATIVE_ARGS next_cum;
8781 tree fntype;
8783 /* This argument doesn't appear to be used anymore. Which is good,
8784 because the old code here didn't suppress rtl generation. */
8785 gcc_assert (!no_rtl);
8787 if (!TARGET_64BIT)
8788 return;
8790 fntype = TREE_TYPE (current_function_decl);
8792 /* For varargs, we do not want to skip the dummy va_dcl argument.
8793 For stdargs, we do want to skip the last named argument. */
8794 next_cum = *cum;
8795 if (stdarg_p (fntype))
8796 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8797 true);
8799 if (cum->call_abi == MS_ABI)
8800 setup_incoming_varargs_ms_64 (&next_cum);
8801 else
8802 setup_incoming_varargs_64 (&next_cum);
8805 static void
8806 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8807 enum machine_mode mode,
8808 tree type,
8809 int *pretend_size ATTRIBUTE_UNUSED,
8810 int no_rtl)
8812 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8813 CUMULATIVE_ARGS next_cum;
8814 tree fntype;
8815 rtx save_area;
8816 int bnd_reg, i, max;
8818 gcc_assert (!no_rtl);
8820 /* Do nothing if we use plain pointer to argument area. */
8821 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8822 return;
8824 fntype = TREE_TYPE (current_function_decl);
8826 /* For varargs, we do not want to skip the dummy va_dcl argument.
8827 For stdargs, we do want to skip the last named argument. */
8828 next_cum = *cum;
8829 if (stdarg_p (fntype))
8830 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8831 true);
8832 save_area = frame_pointer_rtx;
8834 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8835 if (max > X86_64_REGPARM_MAX)
8836 max = X86_64_REGPARM_MAX;
8838 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8839 if (chkp_function_instrumented_p (current_function_decl))
8840 for (i = cum->regno; i < max; i++)
8842 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
8843 rtx reg = gen_rtx_REG (DImode,
8844 x86_64_int_parameter_registers[i]);
8845 rtx ptr = reg;
8846 rtx bounds;
8848 if (bnd_reg <= LAST_BND_REG)
8849 bounds = gen_rtx_REG (BNDmode, bnd_reg);
8850 else
8852 rtx ldx_addr =
8853 plus_constant (Pmode, arg_pointer_rtx,
8854 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
8855 bounds = gen_reg_rtx (BNDmode);
8856 emit_insn (BNDmode == BND64mode
8857 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
8858 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
8861 emit_insn (BNDmode == BND64mode
8862 ? gen_bnd64_stx (addr, ptr, bounds)
8863 : gen_bnd32_stx (addr, ptr, bounds));
8865 bnd_reg++;
8870 /* Checks if TYPE is of kind va_list char *. */
8872 static bool
8873 is_va_list_char_pointer (tree type)
8875 tree canonic;
8877 /* For 32-bit it is always true. */
8878 if (!TARGET_64BIT)
8879 return true;
8880 canonic = ix86_canonical_va_list_type (type);
8881 return (canonic == ms_va_list_type_node
8882 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8885 /* Implement va_start. */
8887 static void
8888 ix86_va_start (tree valist, rtx nextarg)
8890 HOST_WIDE_INT words, n_gpr, n_fpr;
8891 tree f_gpr, f_fpr, f_ovf, f_sav;
8892 tree gpr, fpr, ovf, sav, t;
8893 tree type;
8894 rtx ovf_rtx;
8896 if (flag_split_stack
8897 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8899 unsigned int scratch_regno;
8901 /* When we are splitting the stack, we can't refer to the stack
8902 arguments using internal_arg_pointer, because they may be on
8903 the old stack. The split stack prologue will arrange to
8904 leave a pointer to the old stack arguments in a scratch
8905 register, which we here copy to a pseudo-register. The split
8906 stack prologue can't set the pseudo-register directly because
8907 it (the prologue) runs before any registers have been saved. */
8909 scratch_regno = split_stack_prologue_scratch_regno ();
8910 if (scratch_regno != INVALID_REGNUM)
8912 rtx reg;
8913 rtx_insn *seq;
8915 reg = gen_reg_rtx (Pmode);
8916 cfun->machine->split_stack_varargs_pointer = reg;
8918 start_sequence ();
8919 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8920 seq = get_insns ();
8921 end_sequence ();
8923 push_topmost_sequence ();
8924 emit_insn_after (seq, entry_of_function ());
8925 pop_topmost_sequence ();
8929 /* Only 64bit target needs something special. */
8930 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8932 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8933 std_expand_builtin_va_start (valist, nextarg);
8934 else
8936 rtx va_r, next;
8938 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8939 next = expand_binop (ptr_mode, add_optab,
8940 cfun->machine->split_stack_varargs_pointer,
8941 crtl->args.arg_offset_rtx,
8942 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8943 convert_move (va_r, next, 0);
8945 /* Store zero bounds for va_list. */
8946 if (chkp_function_instrumented_p (current_function_decl))
8947 chkp_expand_bounds_reset_for_mem (valist,
8948 make_tree (TREE_TYPE (valist),
8949 next));
8952 return;
8955 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8956 f_fpr = DECL_CHAIN (f_gpr);
8957 f_ovf = DECL_CHAIN (f_fpr);
8958 f_sav = DECL_CHAIN (f_ovf);
8960 valist = build_simple_mem_ref (valist);
8961 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8962 /* The following should be folded into the MEM_REF offset. */
8963 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8964 f_gpr, NULL_TREE);
8965 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8966 f_fpr, NULL_TREE);
8967 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8968 f_ovf, NULL_TREE);
8969 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8970 f_sav, NULL_TREE);
8972 /* Count number of gp and fp argument registers used. */
8973 words = crtl->args.info.words;
8974 n_gpr = crtl->args.info.regno;
8975 n_fpr = crtl->args.info.sse_regno;
8977 if (cfun->va_list_gpr_size)
8979 type = TREE_TYPE (gpr);
8980 t = build2 (MODIFY_EXPR, type,
8981 gpr, build_int_cst (type, n_gpr * 8));
8982 TREE_SIDE_EFFECTS (t) = 1;
8983 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8986 if (TARGET_SSE && cfun->va_list_fpr_size)
8988 type = TREE_TYPE (fpr);
8989 t = build2 (MODIFY_EXPR, type, fpr,
8990 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8991 TREE_SIDE_EFFECTS (t) = 1;
8992 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8995 /* Find the overflow area. */
8996 type = TREE_TYPE (ovf);
8997 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8998 ovf_rtx = crtl->args.internal_arg_pointer;
8999 else
9000 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
9001 t = make_tree (type, ovf_rtx);
9002 if (words != 0)
9003 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
9005 /* Store zero bounds for overflow area pointer. */
9006 if (chkp_function_instrumented_p (current_function_decl))
9007 chkp_expand_bounds_reset_for_mem (ovf, t);
9009 t = build2 (MODIFY_EXPR, type, ovf, t);
9010 TREE_SIDE_EFFECTS (t) = 1;
9011 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9013 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
9015 /* Find the register save area.
9016 Prologue of the function save it right above stack frame. */
9017 type = TREE_TYPE (sav);
9018 t = make_tree (type, frame_pointer_rtx);
9019 if (!ix86_varargs_gpr_size)
9020 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
9022 /* Store zero bounds for save area pointer. */
9023 if (chkp_function_instrumented_p (current_function_decl))
9024 chkp_expand_bounds_reset_for_mem (sav, t);
9026 t = build2 (MODIFY_EXPR, type, sav, t);
9027 TREE_SIDE_EFFECTS (t) = 1;
9028 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9032 /* Implement va_arg. */
9034 static tree
9035 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9036 gimple_seq *post_p)
9038 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
9039 tree f_gpr, f_fpr, f_ovf, f_sav;
9040 tree gpr, fpr, ovf, sav, t;
9041 int size, rsize;
9042 tree lab_false, lab_over = NULL_TREE;
9043 tree addr, t2;
9044 rtx container;
9045 int indirect_p = 0;
9046 tree ptrtype;
9047 machine_mode nat_mode;
9048 unsigned int arg_boundary;
9050 /* Only 64bit target needs something special. */
9051 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
9052 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
9054 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9055 f_fpr = DECL_CHAIN (f_gpr);
9056 f_ovf = DECL_CHAIN (f_fpr);
9057 f_sav = DECL_CHAIN (f_ovf);
9059 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
9060 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
9061 valist = build_va_arg_indirect_ref (valist);
9062 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9063 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9064 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9066 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
9067 if (indirect_p)
9068 type = build_pointer_type (type);
9069 size = int_size_in_bytes (type);
9070 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
9072 nat_mode = type_natural_mode (type, NULL, false);
9073 switch (nat_mode)
9075 case V8SFmode:
9076 case V8SImode:
9077 case V32QImode:
9078 case V16HImode:
9079 case V4DFmode:
9080 case V4DImode:
9081 case V16SFmode:
9082 case V16SImode:
9083 case V64QImode:
9084 case V32HImode:
9085 case V8DFmode:
9086 case V8DImode:
9087 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9088 if (!TARGET_64BIT_MS_ABI)
9090 container = NULL;
9091 break;
9094 default:
9095 container = construct_container (nat_mode, TYPE_MODE (type),
9096 type, 0, X86_64_REGPARM_MAX,
9097 X86_64_SSE_REGPARM_MAX, intreg,
9099 break;
9102 /* Pull the value out of the saved registers. */
9104 addr = create_tmp_var (ptr_type_node, "addr");
9106 if (container)
9108 int needed_intregs, needed_sseregs;
9109 bool need_temp;
9110 tree int_addr, sse_addr;
9112 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9113 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9115 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
9117 need_temp = (!REG_P (container)
9118 && ((needed_intregs && TYPE_ALIGN (type) > 64)
9119 || TYPE_ALIGN (type) > 128));
9121 /* In case we are passing structure, verify that it is consecutive block
9122 on the register save area. If not we need to do moves. */
9123 if (!need_temp && !REG_P (container))
9125 /* Verify that all registers are strictly consecutive */
9126 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
9128 int i;
9130 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9132 rtx slot = XVECEXP (container, 0, i);
9133 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9134 || INTVAL (XEXP (slot, 1)) != i * 16)
9135 need_temp = true;
9138 else
9140 int i;
9142 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9144 rtx slot = XVECEXP (container, 0, i);
9145 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9146 || INTVAL (XEXP (slot, 1)) != i * 8)
9147 need_temp = true;
9151 if (!need_temp)
9153 int_addr = addr;
9154 sse_addr = addr;
9156 else
9158 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9159 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9162 /* First ensure that we fit completely in registers. */
9163 if (needed_intregs)
9165 t = build_int_cst (TREE_TYPE (gpr),
9166 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9167 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9168 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9169 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9170 gimplify_and_add (t, pre_p);
9172 if (needed_sseregs)
9174 t = build_int_cst (TREE_TYPE (fpr),
9175 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9176 + X86_64_REGPARM_MAX * 8);
9177 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9178 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9179 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9180 gimplify_and_add (t, pre_p);
9183 /* Compute index to start of area used for integer regs. */
9184 if (needed_intregs)
9186 /* int_addr = gpr + sav; */
9187 t = fold_build_pointer_plus (sav, gpr);
9188 gimplify_assign (int_addr, t, pre_p);
9190 if (needed_sseregs)
9192 /* sse_addr = fpr + sav; */
9193 t = fold_build_pointer_plus (sav, fpr);
9194 gimplify_assign (sse_addr, t, pre_p);
9196 if (need_temp)
9198 int i, prev_size = 0;
9199 tree temp = create_tmp_var (type, "va_arg_tmp");
9201 /* addr = &temp; */
9202 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9203 gimplify_assign (addr, t, pre_p);
9205 for (i = 0; i < XVECLEN (container, 0); i++)
9207 rtx slot = XVECEXP (container, 0, i);
9208 rtx reg = XEXP (slot, 0);
9209 machine_mode mode = GET_MODE (reg);
9210 tree piece_type;
9211 tree addr_type;
9212 tree daddr_type;
9213 tree src_addr, src;
9214 int src_offset;
9215 tree dest_addr, dest;
9216 int cur_size = GET_MODE_SIZE (mode);
9218 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9219 prev_size = INTVAL (XEXP (slot, 1));
9220 if (prev_size + cur_size > size)
9222 cur_size = size - prev_size;
9223 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9224 if (mode == BLKmode)
9225 mode = QImode;
9227 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9228 if (mode == GET_MODE (reg))
9229 addr_type = build_pointer_type (piece_type);
9230 else
9231 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9232 true);
9233 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9234 true);
9236 if (SSE_REGNO_P (REGNO (reg)))
9238 src_addr = sse_addr;
9239 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9241 else
9243 src_addr = int_addr;
9244 src_offset = REGNO (reg) * 8;
9246 src_addr = fold_convert (addr_type, src_addr);
9247 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9249 dest_addr = fold_convert (daddr_type, addr);
9250 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9251 if (cur_size == GET_MODE_SIZE (mode))
9253 src = build_va_arg_indirect_ref (src_addr);
9254 dest = build_va_arg_indirect_ref (dest_addr);
9256 gimplify_assign (dest, src, pre_p);
9258 else
9260 tree copy
9261 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9262 3, dest_addr, src_addr,
9263 size_int (cur_size));
9264 gimplify_and_add (copy, pre_p);
9266 prev_size += cur_size;
9270 if (needed_intregs)
9272 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9273 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9274 gimplify_assign (gpr, t, pre_p);
9277 if (needed_sseregs)
9279 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9280 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9281 gimplify_assign (fpr, t, pre_p);
9284 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9286 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9289 /* ... otherwise out of the overflow area. */
9291 /* When we align parameter on stack for caller, if the parameter
9292 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9293 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9294 here with caller. */
9295 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9296 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9297 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9299 /* Care for on-stack alignment if needed. */
9300 if (arg_boundary <= 64 || size == 0)
9301 t = ovf;
9302 else
9304 HOST_WIDE_INT align = arg_boundary / 8;
9305 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9306 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9307 build_int_cst (TREE_TYPE (t), -align));
9310 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9311 gimplify_assign (addr, t, pre_p);
9313 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9314 gimplify_assign (unshare_expr (ovf), t, pre_p);
9316 if (container)
9317 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9319 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9320 addr = fold_convert (ptrtype, addr);
9322 if (indirect_p)
9323 addr = build_va_arg_indirect_ref (addr);
9324 return build_va_arg_indirect_ref (addr);
9327 /* Return true if OPNUM's MEM should be matched
9328 in movabs* patterns. */
9330 bool
9331 ix86_check_movabs (rtx insn, int opnum)
9333 rtx set, mem;
9335 set = PATTERN (insn);
9336 if (GET_CODE (set) == PARALLEL)
9337 set = XVECEXP (set, 0, 0);
9338 gcc_assert (GET_CODE (set) == SET);
9339 mem = XEXP (set, opnum);
9340 while (GET_CODE (mem) == SUBREG)
9341 mem = SUBREG_REG (mem);
9342 gcc_assert (MEM_P (mem));
9343 return volatile_ok || !MEM_VOLATILE_P (mem);
9346 /* Initialize the table of extra 80387 mathematical constants. */
9348 static void
9349 init_ext_80387_constants (void)
9351 static const char * cst[5] =
9353 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9354 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9355 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9356 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9357 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9359 int i;
9361 for (i = 0; i < 5; i++)
9363 real_from_string (&ext_80387_constants_table[i], cst[i]);
9364 /* Ensure each constant is rounded to XFmode precision. */
9365 real_convert (&ext_80387_constants_table[i],
9366 XFmode, &ext_80387_constants_table[i]);
9369 ext_80387_constants_init = 1;
9372 /* Return non-zero if the constant is something that
9373 can be loaded with a special instruction. */
9376 standard_80387_constant_p (rtx x)
9378 machine_mode mode = GET_MODE (x);
9380 REAL_VALUE_TYPE r;
9382 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
9383 return -1;
9385 if (x == CONST0_RTX (mode))
9386 return 1;
9387 if (x == CONST1_RTX (mode))
9388 return 2;
9390 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9392 /* For XFmode constants, try to find a special 80387 instruction when
9393 optimizing for size or on those CPUs that benefit from them. */
9394 if (mode == XFmode
9395 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9397 int i;
9399 if (! ext_80387_constants_init)
9400 init_ext_80387_constants ();
9402 for (i = 0; i < 5; i++)
9403 if (real_identical (&r, &ext_80387_constants_table[i]))
9404 return i + 3;
9407 /* Load of the constant -0.0 or -1.0 will be split as
9408 fldz;fchs or fld1;fchs sequence. */
9409 if (real_isnegzero (&r))
9410 return 8;
9411 if (real_identical (&r, &dconstm1))
9412 return 9;
9414 return 0;
9417 /* Return the opcode of the special instruction to be used to load
9418 the constant X. */
9420 const char *
9421 standard_80387_constant_opcode (rtx x)
9423 switch (standard_80387_constant_p (x))
9425 case 1:
9426 return "fldz";
9427 case 2:
9428 return "fld1";
9429 case 3:
9430 return "fldlg2";
9431 case 4:
9432 return "fldln2";
9433 case 5:
9434 return "fldl2e";
9435 case 6:
9436 return "fldl2t";
9437 case 7:
9438 return "fldpi";
9439 case 8:
9440 case 9:
9441 return "#";
9442 default:
9443 gcc_unreachable ();
9447 /* Return the CONST_DOUBLE representing the 80387 constant that is
9448 loaded by the specified special instruction. The argument IDX
9449 matches the return value from standard_80387_constant_p. */
9452 standard_80387_constant_rtx (int idx)
9454 int i;
9456 if (! ext_80387_constants_init)
9457 init_ext_80387_constants ();
9459 switch (idx)
9461 case 3:
9462 case 4:
9463 case 5:
9464 case 6:
9465 case 7:
9466 i = idx - 3;
9467 break;
9469 default:
9470 gcc_unreachable ();
9473 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9474 XFmode);
9477 /* Return 1 if X is all 0s and 2 if x is all 1s
9478 in supported SSE/AVX vector mode. */
9481 standard_sse_constant_p (rtx x)
9483 machine_mode mode;
9485 if (!TARGET_SSE)
9486 return 0;
9488 mode = GET_MODE (x);
9490 if (x == const0_rtx || x == CONST0_RTX (mode))
9491 return 1;
9492 if (vector_all_ones_operand (x, mode))
9493 switch (mode)
9495 case V16QImode:
9496 case V8HImode:
9497 case V4SImode:
9498 case V2DImode:
9499 if (TARGET_SSE2)
9500 return 2;
9501 case V32QImode:
9502 case V16HImode:
9503 case V8SImode:
9504 case V4DImode:
9505 if (TARGET_AVX2)
9506 return 2;
9507 case V64QImode:
9508 case V32HImode:
9509 case V16SImode:
9510 case V8DImode:
9511 if (TARGET_AVX512F)
9512 return 2;
9513 default:
9514 break;
9517 return 0;
9520 /* Return the opcode of the special instruction to be used to load
9521 the constant X. */
9523 const char *
9524 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9526 switch (standard_sse_constant_p (x))
9528 case 1:
9529 switch (get_attr_mode (insn))
9531 case MODE_XI:
9532 return "vpxord\t%g0, %g0, %g0";
9533 case MODE_V16SF:
9534 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9535 : "vpxord\t%g0, %g0, %g0";
9536 case MODE_V8DF:
9537 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9538 : "vpxorq\t%g0, %g0, %g0";
9539 case MODE_TI:
9540 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9541 : "%vpxor\t%0, %d0";
9542 case MODE_V2DF:
9543 return "%vxorpd\t%0, %d0";
9544 case MODE_V4SF:
9545 return "%vxorps\t%0, %d0";
9547 case MODE_OI:
9548 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9549 : "vpxor\t%x0, %x0, %x0";
9550 case MODE_V4DF:
9551 return "vxorpd\t%x0, %x0, %x0";
9552 case MODE_V8SF:
9553 return "vxorps\t%x0, %x0, %x0";
9555 default:
9556 break;
9559 case 2:
9560 if (TARGET_AVX512VL
9561 || get_attr_mode (insn) == MODE_XI
9562 || get_attr_mode (insn) == MODE_V8DF
9563 || get_attr_mode (insn) == MODE_V16SF)
9564 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9565 if (TARGET_AVX)
9566 return "vpcmpeqd\t%0, %0, %0";
9567 else
9568 return "pcmpeqd\t%0, %0";
9570 default:
9571 break;
9573 gcc_unreachable ();
9576 /* Returns true if OP contains a symbol reference */
9578 bool
9579 symbolic_reference_mentioned_p (rtx op)
9581 const char *fmt;
9582 int i;
9584 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9585 return true;
9587 fmt = GET_RTX_FORMAT (GET_CODE (op));
9588 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9590 if (fmt[i] == 'E')
9592 int j;
9594 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9595 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9596 return true;
9599 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9600 return true;
9603 return false;
9606 /* Return true if it is appropriate to emit `ret' instructions in the
9607 body of a function. Do this only if the epilogue is simple, needing a
9608 couple of insns. Prior to reloading, we can't tell how many registers
9609 must be saved, so return false then. Return false if there is no frame
9610 marker to de-allocate. */
9612 bool
9613 ix86_can_use_return_insn_p (void)
9615 struct ix86_frame frame;
9617 if (! reload_completed || frame_pointer_needed)
9618 return 0;
9620 /* Don't allow more than 32k pop, since that's all we can do
9621 with one instruction. */
9622 if (crtl->args.pops_args && crtl->args.size >= 32768)
9623 return 0;
9625 ix86_compute_frame_layout (&frame);
9626 return (frame.stack_pointer_offset == UNITS_PER_WORD
9627 && (frame.nregs + frame.nsseregs) == 0);
9630 /* Value should be nonzero if functions must have frame pointers.
9631 Zero means the frame pointer need not be set up (and parms may
9632 be accessed via the stack pointer) in functions that seem suitable. */
9634 static bool
9635 ix86_frame_pointer_required (void)
9637 /* If we accessed previous frames, then the generated code expects
9638 to be able to access the saved ebp value in our frame. */
9639 if (cfun->machine->accesses_prev_frame)
9640 return true;
9642 /* Several x86 os'es need a frame pointer for other reasons,
9643 usually pertaining to setjmp. */
9644 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9645 return true;
9647 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9648 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9649 return true;
9651 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9652 allocation is 4GB. */
9653 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9654 return true;
9656 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9657 turns off the frame pointer by default. Turn it back on now if
9658 we've not got a leaf function. */
9659 if (TARGET_OMIT_LEAF_FRAME_POINTER
9660 && (!crtl->is_leaf
9661 || ix86_current_function_calls_tls_descriptor))
9662 return true;
9664 if (crtl->profile && !flag_fentry)
9665 return true;
9667 return false;
9670 /* Record that the current function accesses previous call frames. */
9672 void
9673 ix86_setup_frame_addresses (void)
9675 cfun->machine->accesses_prev_frame = 1;
9678 #ifndef USE_HIDDEN_LINKONCE
9679 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9680 # define USE_HIDDEN_LINKONCE 1
9681 # else
9682 # define USE_HIDDEN_LINKONCE 0
9683 # endif
9684 #endif
9686 static int pic_labels_used;
9688 /* Fills in the label name that should be used for a pc thunk for
9689 the given register. */
9691 static void
9692 get_pc_thunk_name (char name[32], unsigned int regno)
9694 gcc_assert (!TARGET_64BIT);
9696 if (USE_HIDDEN_LINKONCE)
9697 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9698 else
9699 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9703 /* This function generates code for -fpic that loads %ebx with
9704 the return address of the caller and then returns. */
9706 static void
9707 ix86_code_end (void)
9709 rtx xops[2];
9710 int regno;
9712 for (regno = AX_REG; regno <= SP_REG; regno++)
9714 char name[32];
9715 tree decl;
9717 if (!(pic_labels_used & (1 << regno)))
9718 continue;
9720 get_pc_thunk_name (name, regno);
9722 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9723 get_identifier (name),
9724 build_function_type_list (void_type_node, NULL_TREE));
9725 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9726 NULL_TREE, void_type_node);
9727 TREE_PUBLIC (decl) = 1;
9728 TREE_STATIC (decl) = 1;
9729 DECL_IGNORED_P (decl) = 1;
9731 #if TARGET_MACHO
9732 if (TARGET_MACHO)
9734 switch_to_section (darwin_sections[text_coal_section]);
9735 fputs ("\t.weak_definition\t", asm_out_file);
9736 assemble_name (asm_out_file, name);
9737 fputs ("\n\t.private_extern\t", asm_out_file);
9738 assemble_name (asm_out_file, name);
9739 putc ('\n', asm_out_file);
9740 ASM_OUTPUT_LABEL (asm_out_file, name);
9741 DECL_WEAK (decl) = 1;
9743 else
9744 #endif
9745 if (USE_HIDDEN_LINKONCE)
9747 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9749 targetm.asm_out.unique_section (decl, 0);
9750 switch_to_section (get_named_section (decl, NULL, 0));
9752 targetm.asm_out.globalize_label (asm_out_file, name);
9753 fputs ("\t.hidden\t", asm_out_file);
9754 assemble_name (asm_out_file, name);
9755 putc ('\n', asm_out_file);
9756 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9758 else
9760 switch_to_section (text_section);
9761 ASM_OUTPUT_LABEL (asm_out_file, name);
9764 DECL_INITIAL (decl) = make_node (BLOCK);
9765 current_function_decl = decl;
9766 init_function_start (decl);
9767 first_function_block_is_cold = false;
9768 /* Make sure unwind info is emitted for the thunk if needed. */
9769 final_start_function (emit_barrier (), asm_out_file, 1);
9771 /* Pad stack IP move with 4 instructions (two NOPs count
9772 as one instruction). */
9773 if (TARGET_PAD_SHORT_FUNCTION)
9775 int i = 8;
9777 while (i--)
9778 fputs ("\tnop\n", asm_out_file);
9781 xops[0] = gen_rtx_REG (Pmode, regno);
9782 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9783 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9784 output_asm_insn ("%!ret", NULL);
9785 final_end_function ();
9786 init_insn_lengths ();
9787 free_after_compilation (cfun);
9788 set_cfun (NULL);
9789 current_function_decl = NULL;
9792 if (flag_split_stack)
9793 file_end_indicate_split_stack ();
9796 /* Emit code for the SET_GOT patterns. */
9798 const char *
9799 output_set_got (rtx dest, rtx label)
9801 rtx xops[3];
9803 xops[0] = dest;
9805 if (TARGET_VXWORKS_RTP && flag_pic)
9807 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9808 xops[2] = gen_rtx_MEM (Pmode,
9809 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9810 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9812 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9813 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9814 an unadorned address. */
9815 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9816 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9817 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9818 return "";
9821 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9823 if (!flag_pic)
9825 if (TARGET_MACHO)
9826 /* We don't need a pic base, we're not producing pic. */
9827 gcc_unreachable ();
9829 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9830 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9831 targetm.asm_out.internal_label (asm_out_file, "L",
9832 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9834 else
9836 char name[32];
9837 get_pc_thunk_name (name, REGNO (dest));
9838 pic_labels_used |= 1 << REGNO (dest);
9840 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9841 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9842 output_asm_insn ("%!call\t%X2", xops);
9844 #if TARGET_MACHO
9845 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9846 This is what will be referenced by the Mach-O PIC subsystem. */
9847 if (machopic_should_output_picbase_label () || !label)
9848 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9850 /* When we are restoring the pic base at the site of a nonlocal label,
9851 and we decided to emit the pic base above, we will still output a
9852 local label used for calculating the correction offset (even though
9853 the offset will be 0 in that case). */
9854 if (label)
9855 targetm.asm_out.internal_label (asm_out_file, "L",
9856 CODE_LABEL_NUMBER (label));
9857 #endif
9860 if (!TARGET_MACHO)
9861 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9863 return "";
9866 /* Generate an "push" pattern for input ARG. */
9868 static rtx
9869 gen_push (rtx arg)
9871 struct machine_function *m = cfun->machine;
9873 if (m->fs.cfa_reg == stack_pointer_rtx)
9874 m->fs.cfa_offset += UNITS_PER_WORD;
9875 m->fs.sp_offset += UNITS_PER_WORD;
9877 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9878 arg = gen_rtx_REG (word_mode, REGNO (arg));
9880 return gen_rtx_SET (VOIDmode,
9881 gen_rtx_MEM (word_mode,
9882 gen_rtx_PRE_DEC (Pmode,
9883 stack_pointer_rtx)),
9884 arg);
9887 /* Generate an "pop" pattern for input ARG. */
9889 static rtx
9890 gen_pop (rtx arg)
9892 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9893 arg = gen_rtx_REG (word_mode, REGNO (arg));
9895 return gen_rtx_SET (VOIDmode,
9896 arg,
9897 gen_rtx_MEM (word_mode,
9898 gen_rtx_POST_INC (Pmode,
9899 stack_pointer_rtx)));
9902 /* Return >= 0 if there is an unused call-clobbered register available
9903 for the entire function. */
9905 static unsigned int
9906 ix86_select_alt_pic_regnum (void)
9908 if (ix86_use_pseudo_pic_reg ())
9909 return INVALID_REGNUM;
9911 if (crtl->is_leaf
9912 && !crtl->profile
9913 && !ix86_current_function_calls_tls_descriptor)
9915 int i, drap;
9916 /* Can't use the same register for both PIC and DRAP. */
9917 if (crtl->drap_reg)
9918 drap = REGNO (crtl->drap_reg);
9919 else
9920 drap = -1;
9921 for (i = 2; i >= 0; --i)
9922 if (i != drap && !df_regs_ever_live_p (i))
9923 return i;
9926 return INVALID_REGNUM;
9929 /* Return TRUE if we need to save REGNO. */
9931 static bool
9932 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9934 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
9935 && pic_offset_table_rtx)
9937 if (ix86_use_pseudo_pic_reg ())
9939 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
9940 _mcount in prologue. */
9941 if (!TARGET_64BIT && flag_pic && crtl->profile)
9942 return true;
9944 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9945 || crtl->profile
9946 || crtl->calls_eh_return
9947 || crtl->uses_const_pool
9948 || cfun->has_nonlocal_label)
9949 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9952 if (crtl->calls_eh_return && maybe_eh_return)
9954 unsigned i;
9955 for (i = 0; ; i++)
9957 unsigned test = EH_RETURN_DATA_REGNO (i);
9958 if (test == INVALID_REGNUM)
9959 break;
9960 if (test == regno)
9961 return true;
9965 if (crtl->drap_reg
9966 && regno == REGNO (crtl->drap_reg)
9967 && !cfun->machine->no_drap_save_restore)
9968 return true;
9970 return (df_regs_ever_live_p (regno)
9971 && !call_used_regs[regno]
9972 && !fixed_regs[regno]
9973 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9976 /* Return number of saved general prupose registers. */
9978 static int
9979 ix86_nsaved_regs (void)
9981 int nregs = 0;
9982 int regno;
9984 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9985 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9986 nregs ++;
9987 return nregs;
9990 /* Return number of saved SSE registrers. */
9992 static int
9993 ix86_nsaved_sseregs (void)
9995 int nregs = 0;
9996 int regno;
9998 if (!TARGET_64BIT_MS_ABI)
9999 return 0;
10000 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10001 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10002 nregs ++;
10003 return nregs;
10006 /* Given FROM and TO register numbers, say whether this elimination is
10007 allowed. If stack alignment is needed, we can only replace argument
10008 pointer with hard frame pointer, or replace frame pointer with stack
10009 pointer. Otherwise, frame pointer elimination is automatically
10010 handled and all other eliminations are valid. */
10012 static bool
10013 ix86_can_eliminate (const int from, const int to)
10015 if (stack_realign_fp)
10016 return ((from == ARG_POINTER_REGNUM
10017 && to == HARD_FRAME_POINTER_REGNUM)
10018 || (from == FRAME_POINTER_REGNUM
10019 && to == STACK_POINTER_REGNUM));
10020 else
10021 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
10024 /* Return the offset between two registers, one to be eliminated, and the other
10025 its replacement, at the start of a routine. */
10027 HOST_WIDE_INT
10028 ix86_initial_elimination_offset (int from, int to)
10030 struct ix86_frame frame;
10031 ix86_compute_frame_layout (&frame);
10033 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10034 return frame.hard_frame_pointer_offset;
10035 else if (from == FRAME_POINTER_REGNUM
10036 && to == HARD_FRAME_POINTER_REGNUM)
10037 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
10038 else
10040 gcc_assert (to == STACK_POINTER_REGNUM);
10042 if (from == ARG_POINTER_REGNUM)
10043 return frame.stack_pointer_offset;
10045 gcc_assert (from == FRAME_POINTER_REGNUM);
10046 return frame.stack_pointer_offset - frame.frame_pointer_offset;
10050 /* In a dynamically-aligned function, we can't know the offset from
10051 stack pointer to frame pointer, so we must ensure that setjmp
10052 eliminates fp against the hard fp (%ebp) rather than trying to
10053 index from %esp up to the top of the frame across a gap that is
10054 of unknown (at compile-time) size. */
10055 static rtx
10056 ix86_builtin_setjmp_frame_value (void)
10058 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
10061 /* When using -fsplit-stack, the allocation routines set a field in
10062 the TCB to the bottom of the stack plus this much space, measured
10063 in bytes. */
10065 #define SPLIT_STACK_AVAILABLE 256
10067 /* Fill structure ix86_frame about frame of currently computed function. */
10069 static void
10070 ix86_compute_frame_layout (struct ix86_frame *frame)
10072 unsigned HOST_WIDE_INT stack_alignment_needed;
10073 HOST_WIDE_INT offset;
10074 unsigned HOST_WIDE_INT preferred_alignment;
10075 HOST_WIDE_INT size = get_frame_size ();
10076 HOST_WIDE_INT to_allocate;
10078 frame->nregs = ix86_nsaved_regs ();
10079 frame->nsseregs = ix86_nsaved_sseregs ();
10081 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
10082 function prologues and leaf. */
10083 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
10084 && (!crtl->is_leaf || cfun->calls_alloca != 0
10085 || ix86_current_function_calls_tls_descriptor))
10087 crtl->preferred_stack_boundary = 128;
10088 crtl->stack_alignment_needed = 128;
10090 /* preferred_stack_boundary is never updated for call
10091 expanded from tls descriptor. Update it here. We don't update it in
10092 expand stage because according to the comments before
10093 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
10094 away. */
10095 else if (ix86_current_function_calls_tls_descriptor
10096 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
10098 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
10099 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
10100 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
10103 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
10104 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
10106 gcc_assert (!size || stack_alignment_needed);
10107 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
10108 gcc_assert (preferred_alignment <= stack_alignment_needed);
10110 /* For SEH we have to limit the amount of code movement into the prologue.
10111 At present we do this via a BLOCKAGE, at which point there's very little
10112 scheduling that can be done, which means that there's very little point
10113 in doing anything except PUSHs. */
10114 if (TARGET_SEH)
10115 cfun->machine->use_fast_prologue_epilogue = false;
10117 /* During reload iteration the amount of registers saved can change.
10118 Recompute the value as needed. Do not recompute when amount of registers
10119 didn't change as reload does multiple calls to the function and does not
10120 expect the decision to change within single iteration. */
10121 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
10122 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
10124 int count = frame->nregs;
10125 struct cgraph_node *node = cgraph_node::get (current_function_decl);
10127 cfun->machine->use_fast_prologue_epilogue_nregs = count;
10129 /* The fast prologue uses move instead of push to save registers. This
10130 is significantly longer, but also executes faster as modern hardware
10131 can execute the moves in parallel, but can't do that for push/pop.
10133 Be careful about choosing what prologue to emit: When function takes
10134 many instructions to execute we may use slow version as well as in
10135 case function is known to be outside hot spot (this is known with
10136 feedback only). Weight the size of function by number of registers
10137 to save as it is cheap to use one or two push instructions but very
10138 slow to use many of them. */
10139 if (count)
10140 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
10141 if (node->frequency < NODE_FREQUENCY_NORMAL
10142 || (flag_branch_probabilities
10143 && node->frequency < NODE_FREQUENCY_HOT))
10144 cfun->machine->use_fast_prologue_epilogue = false;
10145 else
10146 cfun->machine->use_fast_prologue_epilogue
10147 = !expensive_function_p (count);
10150 frame->save_regs_using_mov
10151 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
10152 /* If static stack checking is enabled and done with probes,
10153 the registers need to be saved before allocating the frame. */
10154 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
10156 /* Skip return address. */
10157 offset = UNITS_PER_WORD;
10159 /* Skip pushed static chain. */
10160 if (ix86_static_chain_on_stack)
10161 offset += UNITS_PER_WORD;
10163 /* Skip saved base pointer. */
10164 if (frame_pointer_needed)
10165 offset += UNITS_PER_WORD;
10166 frame->hfp_save_offset = offset;
10168 /* The traditional frame pointer location is at the top of the frame. */
10169 frame->hard_frame_pointer_offset = offset;
10171 /* Register save area */
10172 offset += frame->nregs * UNITS_PER_WORD;
10173 frame->reg_save_offset = offset;
10175 /* On SEH target, registers are pushed just before the frame pointer
10176 location. */
10177 if (TARGET_SEH)
10178 frame->hard_frame_pointer_offset = offset;
10180 /* Align and set SSE register save area. */
10181 if (frame->nsseregs)
10183 /* The only ABI that has saved SSE registers (Win64) also has a
10184 16-byte aligned default stack, and thus we don't need to be
10185 within the re-aligned local stack frame to save them. */
10186 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
10187 offset = (offset + 16 - 1) & -16;
10188 offset += frame->nsseregs * 16;
10190 frame->sse_reg_save_offset = offset;
10192 /* The re-aligned stack starts here. Values before this point are not
10193 directly comparable with values below this point. In order to make
10194 sure that no value happens to be the same before and after, force
10195 the alignment computation below to add a non-zero value. */
10196 if (stack_realign_fp)
10197 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
10199 /* Va-arg area */
10200 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10201 offset += frame->va_arg_size;
10203 /* Align start of frame for local function. */
10204 if (stack_realign_fp
10205 || offset != frame->sse_reg_save_offset
10206 || size != 0
10207 || !crtl->is_leaf
10208 || cfun->calls_alloca
10209 || ix86_current_function_calls_tls_descriptor)
10210 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10212 /* Frame pointer points here. */
10213 frame->frame_pointer_offset = offset;
10215 offset += size;
10217 /* Add outgoing arguments area. Can be skipped if we eliminated
10218 all the function calls as dead code.
10219 Skipping is however impossible when function calls alloca. Alloca
10220 expander assumes that last crtl->outgoing_args_size
10221 of stack frame are unused. */
10222 if (ACCUMULATE_OUTGOING_ARGS
10223 && (!crtl->is_leaf || cfun->calls_alloca
10224 || ix86_current_function_calls_tls_descriptor))
10226 offset += crtl->outgoing_args_size;
10227 frame->outgoing_arguments_size = crtl->outgoing_args_size;
10229 else
10230 frame->outgoing_arguments_size = 0;
10232 /* Align stack boundary. Only needed if we're calling another function
10233 or using alloca. */
10234 if (!crtl->is_leaf || cfun->calls_alloca
10235 || ix86_current_function_calls_tls_descriptor)
10236 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10238 /* We've reached end of stack frame. */
10239 frame->stack_pointer_offset = offset;
10241 /* Size prologue needs to allocate. */
10242 to_allocate = offset - frame->sse_reg_save_offset;
10244 if ((!to_allocate && frame->nregs <= 1)
10245 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10246 frame->save_regs_using_mov = false;
10248 if (ix86_using_red_zone ()
10249 && crtl->sp_is_unchanging
10250 && crtl->is_leaf
10251 && !ix86_current_function_calls_tls_descriptor)
10253 frame->red_zone_size = to_allocate;
10254 if (frame->save_regs_using_mov)
10255 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10256 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10257 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10259 else
10260 frame->red_zone_size = 0;
10261 frame->stack_pointer_offset -= frame->red_zone_size;
10263 /* The SEH frame pointer location is near the bottom of the frame.
10264 This is enforced by the fact that the difference between the
10265 stack pointer and the frame pointer is limited to 240 bytes in
10266 the unwind data structure. */
10267 if (TARGET_SEH)
10269 HOST_WIDE_INT diff;
10271 /* If we can leave the frame pointer where it is, do so. Also, returns
10272 the establisher frame for __builtin_frame_address (0). */
10273 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10274 if (diff <= SEH_MAX_FRAME_SIZE
10275 && (diff > 240 || (diff & 15) != 0)
10276 && !crtl->accesses_prior_frames)
10278 /* Ideally we'd determine what portion of the local stack frame
10279 (within the constraint of the lowest 240) is most heavily used.
10280 But without that complication, simply bias the frame pointer
10281 by 128 bytes so as to maximize the amount of the local stack
10282 frame that is addressable with 8-bit offsets. */
10283 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10288 /* This is semi-inlined memory_address_length, but simplified
10289 since we know that we're always dealing with reg+offset, and
10290 to avoid having to create and discard all that rtl. */
10292 static inline int
10293 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10295 int len = 4;
10297 if (offset == 0)
10299 /* EBP and R13 cannot be encoded without an offset. */
10300 len = (regno == BP_REG || regno == R13_REG);
10302 else if (IN_RANGE (offset, -128, 127))
10303 len = 1;
10305 /* ESP and R12 must be encoded with a SIB byte. */
10306 if (regno == SP_REG || regno == R12_REG)
10307 len++;
10309 return len;
10312 /* Return an RTX that points to CFA_OFFSET within the stack frame.
10313 The valid base registers are taken from CFUN->MACHINE->FS. */
10315 static rtx
10316 choose_baseaddr (HOST_WIDE_INT cfa_offset)
10318 const struct machine_function *m = cfun->machine;
10319 rtx base_reg = NULL;
10320 HOST_WIDE_INT base_offset = 0;
10322 if (m->use_fast_prologue_epilogue)
10324 /* Choose the base register most likely to allow the most scheduling
10325 opportunities. Generally FP is valid throughout the function,
10326 while DRAP must be reloaded within the epilogue. But choose either
10327 over the SP due to increased encoding size. */
10329 if (m->fs.fp_valid)
10331 base_reg = hard_frame_pointer_rtx;
10332 base_offset = m->fs.fp_offset - cfa_offset;
10334 else if (m->fs.drap_valid)
10336 base_reg = crtl->drap_reg;
10337 base_offset = 0 - cfa_offset;
10339 else if (m->fs.sp_valid)
10341 base_reg = stack_pointer_rtx;
10342 base_offset = m->fs.sp_offset - cfa_offset;
10345 else
10347 HOST_WIDE_INT toffset;
10348 int len = 16, tlen;
10350 /* Choose the base register with the smallest address encoding.
10351 With a tie, choose FP > DRAP > SP. */
10352 if (m->fs.sp_valid)
10354 base_reg = stack_pointer_rtx;
10355 base_offset = m->fs.sp_offset - cfa_offset;
10356 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10358 if (m->fs.drap_valid)
10360 toffset = 0 - cfa_offset;
10361 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10362 if (tlen <= len)
10364 base_reg = crtl->drap_reg;
10365 base_offset = toffset;
10366 len = tlen;
10369 if (m->fs.fp_valid)
10371 toffset = m->fs.fp_offset - cfa_offset;
10372 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10373 if (tlen <= len)
10375 base_reg = hard_frame_pointer_rtx;
10376 base_offset = toffset;
10377 len = tlen;
10381 gcc_assert (base_reg != NULL);
10383 return plus_constant (Pmode, base_reg, base_offset);
10386 /* Emit code to save registers in the prologue. */
10388 static void
10389 ix86_emit_save_regs (void)
10391 unsigned int regno;
10392 rtx_insn *insn;
10394 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10395 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10397 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10398 RTX_FRAME_RELATED_P (insn) = 1;
10402 /* Emit a single register save at CFA - CFA_OFFSET. */
10404 static void
10405 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10406 HOST_WIDE_INT cfa_offset)
10408 struct machine_function *m = cfun->machine;
10409 rtx reg = gen_rtx_REG (mode, regno);
10410 rtx mem, addr, base, insn;
10412 addr = choose_baseaddr (cfa_offset);
10413 mem = gen_frame_mem (mode, addr);
10415 /* For SSE saves, we need to indicate the 128-bit alignment. */
10416 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
10418 insn = emit_move_insn (mem, reg);
10419 RTX_FRAME_RELATED_P (insn) = 1;
10421 base = addr;
10422 if (GET_CODE (base) == PLUS)
10423 base = XEXP (base, 0);
10424 gcc_checking_assert (REG_P (base));
10426 /* When saving registers into a re-aligned local stack frame, avoid
10427 any tricky guessing by dwarf2out. */
10428 if (m->fs.realigned)
10430 gcc_checking_assert (stack_realign_drap);
10432 if (regno == REGNO (crtl->drap_reg))
10434 /* A bit of a hack. We force the DRAP register to be saved in
10435 the re-aligned stack frame, which provides us with a copy
10436 of the CFA that will last past the prologue. Install it. */
10437 gcc_checking_assert (cfun->machine->fs.fp_valid);
10438 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10439 cfun->machine->fs.fp_offset - cfa_offset);
10440 mem = gen_rtx_MEM (mode, addr);
10441 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10443 else
10445 /* The frame pointer is a stable reference within the
10446 aligned frame. Use it. */
10447 gcc_checking_assert (cfun->machine->fs.fp_valid);
10448 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10449 cfun->machine->fs.fp_offset - cfa_offset);
10450 mem = gen_rtx_MEM (mode, addr);
10451 add_reg_note (insn, REG_CFA_EXPRESSION,
10452 gen_rtx_SET (VOIDmode, mem, reg));
10456 /* The memory may not be relative to the current CFA register,
10457 which means that we may need to generate a new pattern for
10458 use by the unwind info. */
10459 else if (base != m->fs.cfa_reg)
10461 addr = plus_constant (Pmode, m->fs.cfa_reg,
10462 m->fs.cfa_offset - cfa_offset);
10463 mem = gen_rtx_MEM (mode, addr);
10464 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
10468 /* Emit code to save registers using MOV insns.
10469 First register is stored at CFA - CFA_OFFSET. */
10470 static void
10471 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10473 unsigned int regno;
10475 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10476 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10478 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10479 cfa_offset -= UNITS_PER_WORD;
10483 /* Emit code to save SSE registers using MOV insns.
10484 First register is stored at CFA - CFA_OFFSET. */
10485 static void
10486 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10488 unsigned int regno;
10490 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10491 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10493 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10494 cfa_offset -= 16;
10498 static GTY(()) rtx queued_cfa_restores;
10500 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10501 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10502 Don't add the note if the previously saved value will be left untouched
10503 within stack red-zone till return, as unwinders can find the same value
10504 in the register and on the stack. */
10506 static void
10507 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
10509 if (!crtl->shrink_wrapped
10510 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10511 return;
10513 if (insn)
10515 add_reg_note (insn, REG_CFA_RESTORE, reg);
10516 RTX_FRAME_RELATED_P (insn) = 1;
10518 else
10519 queued_cfa_restores
10520 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10523 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10525 static void
10526 ix86_add_queued_cfa_restore_notes (rtx insn)
10528 rtx last;
10529 if (!queued_cfa_restores)
10530 return;
10531 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10533 XEXP (last, 1) = REG_NOTES (insn);
10534 REG_NOTES (insn) = queued_cfa_restores;
10535 queued_cfa_restores = NULL_RTX;
10536 RTX_FRAME_RELATED_P (insn) = 1;
10539 /* Expand prologue or epilogue stack adjustment.
10540 The pattern exist to put a dependency on all ebp-based memory accesses.
10541 STYLE should be negative if instructions should be marked as frame related,
10542 zero if %r11 register is live and cannot be freely used and positive
10543 otherwise. */
10545 static void
10546 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10547 int style, bool set_cfa)
10549 struct machine_function *m = cfun->machine;
10550 rtx insn;
10551 bool add_frame_related_expr = false;
10553 if (Pmode == SImode)
10554 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10555 else if (x86_64_immediate_operand (offset, DImode))
10556 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10557 else
10559 rtx tmp;
10560 /* r11 is used by indirect sibcall return as well, set before the
10561 epilogue and used after the epilogue. */
10562 if (style)
10563 tmp = gen_rtx_REG (DImode, R11_REG);
10564 else
10566 gcc_assert (src != hard_frame_pointer_rtx
10567 && dest != hard_frame_pointer_rtx);
10568 tmp = hard_frame_pointer_rtx;
10570 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10571 if (style < 0)
10572 add_frame_related_expr = true;
10574 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10577 insn = emit_insn (insn);
10578 if (style >= 0)
10579 ix86_add_queued_cfa_restore_notes (insn);
10581 if (set_cfa)
10583 rtx r;
10585 gcc_assert (m->fs.cfa_reg == src);
10586 m->fs.cfa_offset += INTVAL (offset);
10587 m->fs.cfa_reg = dest;
10589 r = gen_rtx_PLUS (Pmode, src, offset);
10590 r = gen_rtx_SET (VOIDmode, dest, r);
10591 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10592 RTX_FRAME_RELATED_P (insn) = 1;
10594 else if (style < 0)
10596 RTX_FRAME_RELATED_P (insn) = 1;
10597 if (add_frame_related_expr)
10599 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10600 r = gen_rtx_SET (VOIDmode, dest, r);
10601 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10605 if (dest == stack_pointer_rtx)
10607 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10608 bool valid = m->fs.sp_valid;
10610 if (src == hard_frame_pointer_rtx)
10612 valid = m->fs.fp_valid;
10613 ooffset = m->fs.fp_offset;
10615 else if (src == crtl->drap_reg)
10617 valid = m->fs.drap_valid;
10618 ooffset = 0;
10620 else
10622 /* Else there are two possibilities: SP itself, which we set
10623 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10624 taken care of this by hand along the eh_return path. */
10625 gcc_checking_assert (src == stack_pointer_rtx
10626 || offset == const0_rtx);
10629 m->fs.sp_offset = ooffset - INTVAL (offset);
10630 m->fs.sp_valid = valid;
10634 /* Find an available register to be used as dynamic realign argument
10635 pointer regsiter. Such a register will be written in prologue and
10636 used in begin of body, so it must not be
10637 1. parameter passing register.
10638 2. GOT pointer.
10639 We reuse static-chain register if it is available. Otherwise, we
10640 use DI for i386 and R13 for x86-64. We chose R13 since it has
10641 shorter encoding.
10643 Return: the regno of chosen register. */
10645 static unsigned int
10646 find_drap_reg (void)
10648 tree decl = cfun->decl;
10650 if (TARGET_64BIT)
10652 /* Use R13 for nested function or function need static chain.
10653 Since function with tail call may use any caller-saved
10654 registers in epilogue, DRAP must not use caller-saved
10655 register in such case. */
10656 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10657 return R13_REG;
10659 return R10_REG;
10661 else
10663 /* Use DI for nested function or function need static chain.
10664 Since function with tail call may use any caller-saved
10665 registers in epilogue, DRAP must not use caller-saved
10666 register in such case. */
10667 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10668 return DI_REG;
10670 /* Reuse static chain register if it isn't used for parameter
10671 passing. */
10672 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10674 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10675 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10676 return CX_REG;
10678 return DI_REG;
10682 /* Return minimum incoming stack alignment. */
10684 static unsigned int
10685 ix86_minimum_incoming_stack_boundary (bool sibcall)
10687 unsigned int incoming_stack_boundary;
10689 /* Prefer the one specified at command line. */
10690 if (ix86_user_incoming_stack_boundary)
10691 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10692 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10693 if -mstackrealign is used, it isn't used for sibcall check and
10694 estimated stack alignment is 128bit. */
10695 else if (!sibcall
10696 && !TARGET_64BIT
10697 && ix86_force_align_arg_pointer
10698 && crtl->stack_alignment_estimated == 128)
10699 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10700 else
10701 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10703 /* Incoming stack alignment can be changed on individual functions
10704 via force_align_arg_pointer attribute. We use the smallest
10705 incoming stack boundary. */
10706 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10707 && lookup_attribute (ix86_force_align_arg_pointer_string,
10708 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10709 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10711 /* The incoming stack frame has to be aligned at least at
10712 parm_stack_boundary. */
10713 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10714 incoming_stack_boundary = crtl->parm_stack_boundary;
10716 /* Stack at entrance of main is aligned by runtime. We use the
10717 smallest incoming stack boundary. */
10718 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10719 && DECL_NAME (current_function_decl)
10720 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10721 && DECL_FILE_SCOPE_P (current_function_decl))
10722 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10724 return incoming_stack_boundary;
10727 /* Update incoming stack boundary and estimated stack alignment. */
10729 static void
10730 ix86_update_stack_boundary (void)
10732 ix86_incoming_stack_boundary
10733 = ix86_minimum_incoming_stack_boundary (false);
10735 /* x86_64 vararg needs 16byte stack alignment for register save
10736 area. */
10737 if (TARGET_64BIT
10738 && cfun->stdarg
10739 && crtl->stack_alignment_estimated < 128)
10740 crtl->stack_alignment_estimated = 128;
10743 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10744 needed or an rtx for DRAP otherwise. */
10746 static rtx
10747 ix86_get_drap_rtx (void)
10749 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10750 crtl->need_drap = true;
10752 if (stack_realign_drap)
10754 /* Assign DRAP to vDRAP and returns vDRAP */
10755 unsigned int regno = find_drap_reg ();
10756 rtx drap_vreg;
10757 rtx arg_ptr;
10758 rtx_insn *seq, *insn;
10760 arg_ptr = gen_rtx_REG (Pmode, regno);
10761 crtl->drap_reg = arg_ptr;
10763 start_sequence ();
10764 drap_vreg = copy_to_reg (arg_ptr);
10765 seq = get_insns ();
10766 end_sequence ();
10768 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10769 if (!optimize)
10771 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10772 RTX_FRAME_RELATED_P (insn) = 1;
10774 return drap_vreg;
10776 else
10777 return NULL;
10780 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10782 static rtx
10783 ix86_internal_arg_pointer (void)
10785 return virtual_incoming_args_rtx;
10788 struct scratch_reg {
10789 rtx reg;
10790 bool saved;
10793 /* Return a short-lived scratch register for use on function entry.
10794 In 32-bit mode, it is valid only after the registers are saved
10795 in the prologue. This register must be released by means of
10796 release_scratch_register_on_entry once it is dead. */
10798 static void
10799 get_scratch_register_on_entry (struct scratch_reg *sr)
10801 int regno;
10803 sr->saved = false;
10805 if (TARGET_64BIT)
10807 /* We always use R11 in 64-bit mode. */
10808 regno = R11_REG;
10810 else
10812 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10813 bool fastcall_p
10814 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10815 bool thiscall_p
10816 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10817 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10818 int regparm = ix86_function_regparm (fntype, decl);
10819 int drap_regno
10820 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10822 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10823 for the static chain register. */
10824 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10825 && drap_regno != AX_REG)
10826 regno = AX_REG;
10827 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10828 for the static chain register. */
10829 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10830 regno = AX_REG;
10831 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10832 regno = DX_REG;
10833 /* ecx is the static chain register. */
10834 else if (regparm < 3 && !fastcall_p && !thiscall_p
10835 && !static_chain_p
10836 && drap_regno != CX_REG)
10837 regno = CX_REG;
10838 else if (ix86_save_reg (BX_REG, true))
10839 regno = BX_REG;
10840 /* esi is the static chain register. */
10841 else if (!(regparm == 3 && static_chain_p)
10842 && ix86_save_reg (SI_REG, true))
10843 regno = SI_REG;
10844 else if (ix86_save_reg (DI_REG, true))
10845 regno = DI_REG;
10846 else
10848 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10849 sr->saved = true;
10853 sr->reg = gen_rtx_REG (Pmode, regno);
10854 if (sr->saved)
10856 rtx_insn *insn = emit_insn (gen_push (sr->reg));
10857 RTX_FRAME_RELATED_P (insn) = 1;
10861 /* Release a scratch register obtained from the preceding function. */
10863 static void
10864 release_scratch_register_on_entry (struct scratch_reg *sr)
10866 if (sr->saved)
10868 struct machine_function *m = cfun->machine;
10869 rtx x, insn = emit_insn (gen_pop (sr->reg));
10871 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10872 RTX_FRAME_RELATED_P (insn) = 1;
10873 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10874 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10875 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10876 m->fs.sp_offset -= UNITS_PER_WORD;
10880 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10882 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10884 static void
10885 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10887 /* We skip the probe for the first interval + a small dope of 4 words and
10888 probe that many bytes past the specified size to maintain a protection
10889 area at the botton of the stack. */
10890 const int dope = 4 * UNITS_PER_WORD;
10891 rtx size_rtx = GEN_INT (size), last;
10893 /* See if we have a constant small number of probes to generate. If so,
10894 that's the easy case. The run-time loop is made up of 11 insns in the
10895 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10896 for n # of intervals. */
10897 if (size <= 5 * PROBE_INTERVAL)
10899 HOST_WIDE_INT i, adjust;
10900 bool first_probe = true;
10902 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10903 values of N from 1 until it exceeds SIZE. If only one probe is
10904 needed, this will not generate any code. Then adjust and probe
10905 to PROBE_INTERVAL + SIZE. */
10906 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10908 if (first_probe)
10910 adjust = 2 * PROBE_INTERVAL + dope;
10911 first_probe = false;
10913 else
10914 adjust = PROBE_INTERVAL;
10916 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10917 plus_constant (Pmode, stack_pointer_rtx,
10918 -adjust)));
10919 emit_stack_probe (stack_pointer_rtx);
10922 if (first_probe)
10923 adjust = size + PROBE_INTERVAL + dope;
10924 else
10925 adjust = size + PROBE_INTERVAL - i;
10927 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10928 plus_constant (Pmode, stack_pointer_rtx,
10929 -adjust)));
10930 emit_stack_probe (stack_pointer_rtx);
10932 /* Adjust back to account for the additional first interval. */
10933 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10934 plus_constant (Pmode, stack_pointer_rtx,
10935 PROBE_INTERVAL + dope)));
10938 /* Otherwise, do the same as above, but in a loop. Note that we must be
10939 extra careful with variables wrapping around because we might be at
10940 the very top (or the very bottom) of the address space and we have
10941 to be able to handle this case properly; in particular, we use an
10942 equality test for the loop condition. */
10943 else
10945 HOST_WIDE_INT rounded_size;
10946 struct scratch_reg sr;
10948 get_scratch_register_on_entry (&sr);
10951 /* Step 1: round SIZE to the previous multiple of the interval. */
10953 rounded_size = size & -PROBE_INTERVAL;
10956 /* Step 2: compute initial and final value of the loop counter. */
10958 /* SP = SP_0 + PROBE_INTERVAL. */
10959 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10960 plus_constant (Pmode, stack_pointer_rtx,
10961 - (PROBE_INTERVAL + dope))));
10963 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10964 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10965 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10966 gen_rtx_PLUS (Pmode, sr.reg,
10967 stack_pointer_rtx)));
10970 /* Step 3: the loop
10972 while (SP != LAST_ADDR)
10974 SP = SP + PROBE_INTERVAL
10975 probe at SP
10978 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10979 values of N from 1 until it is equal to ROUNDED_SIZE. */
10981 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10984 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10985 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10987 if (size != rounded_size)
10989 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10990 plus_constant (Pmode, stack_pointer_rtx,
10991 rounded_size - size)));
10992 emit_stack_probe (stack_pointer_rtx);
10995 /* Adjust back to account for the additional first interval. */
10996 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10997 plus_constant (Pmode, stack_pointer_rtx,
10998 PROBE_INTERVAL + dope)));
11000 release_scratch_register_on_entry (&sr);
11003 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
11005 /* Even if the stack pointer isn't the CFA register, we need to correctly
11006 describe the adjustments made to it, in particular differentiate the
11007 frame-related ones from the frame-unrelated ones. */
11008 if (size > 0)
11010 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
11011 XVECEXP (expr, 0, 0)
11012 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11013 plus_constant (Pmode, stack_pointer_rtx, -size));
11014 XVECEXP (expr, 0, 1)
11015 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11016 plus_constant (Pmode, stack_pointer_rtx,
11017 PROBE_INTERVAL + dope + size));
11018 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
11019 RTX_FRAME_RELATED_P (last) = 1;
11021 cfun->machine->fs.sp_offset += size;
11024 /* Make sure nothing is scheduled before we are done. */
11025 emit_insn (gen_blockage ());
11028 /* Adjust the stack pointer up to REG while probing it. */
11030 const char *
11031 output_adjust_stack_and_probe (rtx reg)
11033 static int labelno = 0;
11034 char loop_lab[32], end_lab[32];
11035 rtx xops[2];
11037 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11038 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11040 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11042 /* Jump to END_LAB if SP == LAST_ADDR. */
11043 xops[0] = stack_pointer_rtx;
11044 xops[1] = reg;
11045 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11046 fputs ("\tje\t", asm_out_file);
11047 assemble_name_raw (asm_out_file, end_lab);
11048 fputc ('\n', asm_out_file);
11050 /* SP = SP + PROBE_INTERVAL. */
11051 xops[1] = GEN_INT (PROBE_INTERVAL);
11052 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11054 /* Probe at SP. */
11055 xops[1] = const0_rtx;
11056 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
11058 fprintf (asm_out_file, "\tjmp\t");
11059 assemble_name_raw (asm_out_file, loop_lab);
11060 fputc ('\n', asm_out_file);
11062 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11064 return "";
11067 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
11068 inclusive. These are offsets from the current stack pointer. */
11070 static void
11071 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
11073 /* See if we have a constant small number of probes to generate. If so,
11074 that's the easy case. The run-time loop is made up of 7 insns in the
11075 generic case while the compile-time loop is made up of n insns for n #
11076 of intervals. */
11077 if (size <= 7 * PROBE_INTERVAL)
11079 HOST_WIDE_INT i;
11081 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
11082 it exceeds SIZE. If only one probe is needed, this will not
11083 generate any code. Then probe at FIRST + SIZE. */
11084 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
11085 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11086 -(first + i)));
11088 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11089 -(first + size)));
11092 /* Otherwise, do the same as above, but in a loop. Note that we must be
11093 extra careful with variables wrapping around because we might be at
11094 the very top (or the very bottom) of the address space and we have
11095 to be able to handle this case properly; in particular, we use an
11096 equality test for the loop condition. */
11097 else
11099 HOST_WIDE_INT rounded_size, last;
11100 struct scratch_reg sr;
11102 get_scratch_register_on_entry (&sr);
11105 /* Step 1: round SIZE to the previous multiple of the interval. */
11107 rounded_size = size & -PROBE_INTERVAL;
11110 /* Step 2: compute initial and final value of the loop counter. */
11112 /* TEST_OFFSET = FIRST. */
11113 emit_move_insn (sr.reg, GEN_INT (-first));
11115 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
11116 last = first + rounded_size;
11119 /* Step 3: the loop
11121 while (TEST_ADDR != LAST_ADDR)
11123 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
11124 probe at TEST_ADDR
11127 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
11128 until it is equal to ROUNDED_SIZE. */
11130 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
11133 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
11134 that SIZE is equal to ROUNDED_SIZE. */
11136 if (size != rounded_size)
11137 emit_stack_probe (plus_constant (Pmode,
11138 gen_rtx_PLUS (Pmode,
11139 stack_pointer_rtx,
11140 sr.reg),
11141 rounded_size - size));
11143 release_scratch_register_on_entry (&sr);
11146 /* Make sure nothing is scheduled before we are done. */
11147 emit_insn (gen_blockage ());
11150 /* Probe a range of stack addresses from REG to END, inclusive. These are
11151 offsets from the current stack pointer. */
11153 const char *
11154 output_probe_stack_range (rtx reg, rtx end)
11156 static int labelno = 0;
11157 char loop_lab[32], end_lab[32];
11158 rtx xops[3];
11160 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11161 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11163 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11165 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
11166 xops[0] = reg;
11167 xops[1] = end;
11168 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11169 fputs ("\tje\t", asm_out_file);
11170 assemble_name_raw (asm_out_file, end_lab);
11171 fputc ('\n', asm_out_file);
11173 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
11174 xops[1] = GEN_INT (PROBE_INTERVAL);
11175 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11177 /* Probe at TEST_ADDR. */
11178 xops[0] = stack_pointer_rtx;
11179 xops[1] = reg;
11180 xops[2] = const0_rtx;
11181 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
11183 fprintf (asm_out_file, "\tjmp\t");
11184 assemble_name_raw (asm_out_file, loop_lab);
11185 fputc ('\n', asm_out_file);
11187 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11189 return "";
11192 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
11193 to be generated in correct form. */
11194 static void
11195 ix86_finalize_stack_realign_flags (void)
11197 /* Check if stack realign is really needed after reload, and
11198 stores result in cfun */
11199 unsigned int incoming_stack_boundary
11200 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11201 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11202 unsigned int stack_realign = (incoming_stack_boundary
11203 < (crtl->is_leaf
11204 ? crtl->max_used_stack_slot_alignment
11205 : crtl->stack_alignment_needed));
11207 if (crtl->stack_realign_finalized)
11209 /* After stack_realign_needed is finalized, we can't no longer
11210 change it. */
11211 gcc_assert (crtl->stack_realign_needed == stack_realign);
11212 return;
11215 /* If the only reason for frame_pointer_needed is that we conservatively
11216 assumed stack realignment might be needed, but in the end nothing that
11217 needed the stack alignment had been spilled, clear frame_pointer_needed
11218 and say we don't need stack realignment. */
11219 if (stack_realign
11220 && frame_pointer_needed
11221 && crtl->is_leaf
11222 && flag_omit_frame_pointer
11223 && crtl->sp_is_unchanging
11224 && !ix86_current_function_calls_tls_descriptor
11225 && !crtl->accesses_prior_frames
11226 && !cfun->calls_alloca
11227 && !crtl->calls_eh_return
11228 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
11229 && !ix86_frame_pointer_required ()
11230 && get_frame_size () == 0
11231 && ix86_nsaved_sseregs () == 0
11232 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11234 HARD_REG_SET set_up_by_prologue, prologue_used;
11235 basic_block bb;
11237 CLEAR_HARD_REG_SET (prologue_used);
11238 CLEAR_HARD_REG_SET (set_up_by_prologue);
11239 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11240 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11241 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11242 HARD_FRAME_POINTER_REGNUM);
11243 FOR_EACH_BB_FN (bb, cfun)
11245 rtx_insn *insn;
11246 FOR_BB_INSNS (bb, insn)
11247 if (NONDEBUG_INSN_P (insn)
11248 && requires_stack_frame_p (insn, prologue_used,
11249 set_up_by_prologue))
11251 crtl->stack_realign_needed = stack_realign;
11252 crtl->stack_realign_finalized = true;
11253 return;
11257 /* If drap has been set, but it actually isn't live at the start
11258 of the function, there is no reason to set it up. */
11259 if (crtl->drap_reg)
11261 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11262 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11264 crtl->drap_reg = NULL_RTX;
11265 crtl->need_drap = false;
11268 else
11269 cfun->machine->no_drap_save_restore = true;
11271 frame_pointer_needed = false;
11272 stack_realign = false;
11273 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11274 crtl->stack_alignment_needed = incoming_stack_boundary;
11275 crtl->stack_alignment_estimated = incoming_stack_boundary;
11276 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11277 crtl->preferred_stack_boundary = incoming_stack_boundary;
11278 df_finish_pass (true);
11279 df_scan_alloc (NULL);
11280 df_scan_blocks ();
11281 df_compute_regs_ever_live (true);
11282 df_analyze ();
11285 crtl->stack_realign_needed = stack_realign;
11286 crtl->stack_realign_finalized = true;
11289 /* Delete SET_GOT right after entry block if it is allocated to reg. */
11291 static void
11292 ix86_elim_entry_set_got (rtx reg)
11294 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11295 rtx_insn *c_insn = BB_HEAD (bb);
11296 if (!NONDEBUG_INSN_P (c_insn))
11297 c_insn = next_nonnote_nondebug_insn (c_insn);
11298 if (c_insn && NONJUMP_INSN_P (c_insn))
11300 rtx pat = PATTERN (c_insn);
11301 if (GET_CODE (pat) == PARALLEL)
11303 rtx vec = XVECEXP (pat, 0, 0);
11304 if (GET_CODE (vec) == SET
11305 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11306 && REGNO (XEXP (vec, 0)) == REGNO (reg))
11307 delete_insn (c_insn);
11312 /* Expand the prologue into a bunch of separate insns. */
11314 void
11315 ix86_expand_prologue (void)
11317 struct machine_function *m = cfun->machine;
11318 rtx insn, t;
11319 struct ix86_frame frame;
11320 HOST_WIDE_INT allocate;
11321 bool int_registers_saved;
11322 bool sse_registers_saved;
11324 ix86_finalize_stack_realign_flags ();
11326 /* DRAP should not coexist with stack_realign_fp */
11327 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11329 memset (&m->fs, 0, sizeof (m->fs));
11331 /* Initialize CFA state for before the prologue. */
11332 m->fs.cfa_reg = stack_pointer_rtx;
11333 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11335 /* Track SP offset to the CFA. We continue tracking this after we've
11336 swapped the CFA register away from SP. In the case of re-alignment
11337 this is fudged; we're interested to offsets within the local frame. */
11338 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11339 m->fs.sp_valid = true;
11341 ix86_compute_frame_layout (&frame);
11343 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11345 /* We should have already generated an error for any use of
11346 ms_hook on a nested function. */
11347 gcc_checking_assert (!ix86_static_chain_on_stack);
11349 /* Check if profiling is active and we shall use profiling before
11350 prologue variant. If so sorry. */
11351 if (crtl->profile && flag_fentry != 0)
11352 sorry ("ms_hook_prologue attribute isn%'t compatible "
11353 "with -mfentry for 32-bit");
11355 /* In ix86_asm_output_function_label we emitted:
11356 8b ff movl.s %edi,%edi
11357 55 push %ebp
11358 8b ec movl.s %esp,%ebp
11360 This matches the hookable function prologue in Win32 API
11361 functions in Microsoft Windows XP Service Pack 2 and newer.
11362 Wine uses this to enable Windows apps to hook the Win32 API
11363 functions provided by Wine.
11365 What that means is that we've already set up the frame pointer. */
11367 if (frame_pointer_needed
11368 && !(crtl->drap_reg && crtl->stack_realign_needed))
11370 rtx push, mov;
11372 /* We've decided to use the frame pointer already set up.
11373 Describe this to the unwinder by pretending that both
11374 push and mov insns happen right here.
11376 Putting the unwind info here at the end of the ms_hook
11377 is done so that we can make absolutely certain we get
11378 the required byte sequence at the start of the function,
11379 rather than relying on an assembler that can produce
11380 the exact encoding required.
11382 However it does mean (in the unpatched case) that we have
11383 a 1 insn window where the asynchronous unwind info is
11384 incorrect. However, if we placed the unwind info at
11385 its correct location we would have incorrect unwind info
11386 in the patched case. Which is probably all moot since
11387 I don't expect Wine generates dwarf2 unwind info for the
11388 system libraries that use this feature. */
11390 insn = emit_insn (gen_blockage ());
11392 push = gen_push (hard_frame_pointer_rtx);
11393 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
11394 stack_pointer_rtx);
11395 RTX_FRAME_RELATED_P (push) = 1;
11396 RTX_FRAME_RELATED_P (mov) = 1;
11398 RTX_FRAME_RELATED_P (insn) = 1;
11399 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11400 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11402 /* Note that gen_push incremented m->fs.cfa_offset, even
11403 though we didn't emit the push insn here. */
11404 m->fs.cfa_reg = hard_frame_pointer_rtx;
11405 m->fs.fp_offset = m->fs.cfa_offset;
11406 m->fs.fp_valid = true;
11408 else
11410 /* The frame pointer is not needed so pop %ebp again.
11411 This leaves us with a pristine state. */
11412 emit_insn (gen_pop (hard_frame_pointer_rtx));
11416 /* The first insn of a function that accepts its static chain on the
11417 stack is to push the register that would be filled in by a direct
11418 call. This insn will be skipped by the trampoline. */
11419 else if (ix86_static_chain_on_stack)
11421 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
11422 emit_insn (gen_blockage ());
11424 /* We don't want to interpret this push insn as a register save,
11425 only as a stack adjustment. The real copy of the register as
11426 a save will be done later, if needed. */
11427 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11428 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
11429 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11430 RTX_FRAME_RELATED_P (insn) = 1;
11433 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11434 of DRAP is needed and stack realignment is really needed after reload */
11435 if (stack_realign_drap)
11437 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11439 /* Only need to push parameter pointer reg if it is caller saved. */
11440 if (!call_used_regs[REGNO (crtl->drap_reg)])
11442 /* Push arg pointer reg */
11443 insn = emit_insn (gen_push (crtl->drap_reg));
11444 RTX_FRAME_RELATED_P (insn) = 1;
11447 /* Grab the argument pointer. */
11448 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11449 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11450 RTX_FRAME_RELATED_P (insn) = 1;
11451 m->fs.cfa_reg = crtl->drap_reg;
11452 m->fs.cfa_offset = 0;
11454 /* Align the stack. */
11455 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11456 stack_pointer_rtx,
11457 GEN_INT (-align_bytes)));
11458 RTX_FRAME_RELATED_P (insn) = 1;
11460 /* Replicate the return address on the stack so that return
11461 address can be reached via (argp - 1) slot. This is needed
11462 to implement macro RETURN_ADDR_RTX and intrinsic function
11463 expand_builtin_return_addr etc. */
11464 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11465 t = gen_frame_mem (word_mode, t);
11466 insn = emit_insn (gen_push (t));
11467 RTX_FRAME_RELATED_P (insn) = 1;
11469 /* For the purposes of frame and register save area addressing,
11470 we've started over with a new frame. */
11471 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11472 m->fs.realigned = true;
11475 int_registers_saved = (frame.nregs == 0);
11476 sse_registers_saved = (frame.nsseregs == 0);
11478 if (frame_pointer_needed && !m->fs.fp_valid)
11480 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11481 slower on all targets. Also sdb doesn't like it. */
11482 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11483 RTX_FRAME_RELATED_P (insn) = 1;
11485 /* Push registers now, before setting the frame pointer
11486 on SEH target. */
11487 if (!int_registers_saved
11488 && TARGET_SEH
11489 && !frame.save_regs_using_mov)
11491 ix86_emit_save_regs ();
11492 int_registers_saved = true;
11493 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11496 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11498 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11499 RTX_FRAME_RELATED_P (insn) = 1;
11501 if (m->fs.cfa_reg == stack_pointer_rtx)
11502 m->fs.cfa_reg = hard_frame_pointer_rtx;
11503 m->fs.fp_offset = m->fs.sp_offset;
11504 m->fs.fp_valid = true;
11508 if (!int_registers_saved)
11510 /* If saving registers via PUSH, do so now. */
11511 if (!frame.save_regs_using_mov)
11513 ix86_emit_save_regs ();
11514 int_registers_saved = true;
11515 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11518 /* When using red zone we may start register saving before allocating
11519 the stack frame saving one cycle of the prologue. However, avoid
11520 doing this if we have to probe the stack; at least on x86_64 the
11521 stack probe can turn into a call that clobbers a red zone location. */
11522 else if (ix86_using_red_zone ()
11523 && (! TARGET_STACK_PROBE
11524 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11526 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11527 int_registers_saved = true;
11531 if (stack_realign_fp)
11533 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11534 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11536 /* The computation of the size of the re-aligned stack frame means
11537 that we must allocate the size of the register save area before
11538 performing the actual alignment. Otherwise we cannot guarantee
11539 that there's enough storage above the realignment point. */
11540 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11541 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11542 GEN_INT (m->fs.sp_offset
11543 - frame.sse_reg_save_offset),
11544 -1, false);
11546 /* Align the stack. */
11547 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11548 stack_pointer_rtx,
11549 GEN_INT (-align_bytes)));
11551 /* For the purposes of register save area addressing, the stack
11552 pointer is no longer valid. As for the value of sp_offset,
11553 see ix86_compute_frame_layout, which we need to match in order
11554 to pass verification of stack_pointer_offset at the end. */
11555 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11556 m->fs.sp_valid = false;
11559 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11561 if (flag_stack_usage_info)
11563 /* We start to count from ARG_POINTER. */
11564 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11566 /* If it was realigned, take into account the fake frame. */
11567 if (stack_realign_drap)
11569 if (ix86_static_chain_on_stack)
11570 stack_size += UNITS_PER_WORD;
11572 if (!call_used_regs[REGNO (crtl->drap_reg)])
11573 stack_size += UNITS_PER_WORD;
11575 /* This over-estimates by 1 minimal-stack-alignment-unit but
11576 mitigates that by counting in the new return address slot. */
11577 current_function_dynamic_stack_size
11578 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11581 current_function_static_stack_size = stack_size;
11584 /* On SEH target with very large frame size, allocate an area to save
11585 SSE registers (as the very large allocation won't be described). */
11586 if (TARGET_SEH
11587 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11588 && !sse_registers_saved)
11590 HOST_WIDE_INT sse_size =
11591 frame.sse_reg_save_offset - frame.reg_save_offset;
11593 gcc_assert (int_registers_saved);
11595 /* No need to do stack checking as the area will be immediately
11596 written. */
11597 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11598 GEN_INT (-sse_size), -1,
11599 m->fs.cfa_reg == stack_pointer_rtx);
11600 allocate -= sse_size;
11601 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11602 sse_registers_saved = true;
11605 /* The stack has already been decremented by the instruction calling us
11606 so probe if the size is non-negative to preserve the protection area. */
11607 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11609 /* We expect the registers to be saved when probes are used. */
11610 gcc_assert (int_registers_saved);
11612 if (STACK_CHECK_MOVING_SP)
11614 if (!(crtl->is_leaf && !cfun->calls_alloca
11615 && allocate <= PROBE_INTERVAL))
11617 ix86_adjust_stack_and_probe (allocate);
11618 allocate = 0;
11621 else
11623 HOST_WIDE_INT size = allocate;
11625 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11626 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11628 if (TARGET_STACK_PROBE)
11630 if (crtl->is_leaf && !cfun->calls_alloca)
11632 if (size > PROBE_INTERVAL)
11633 ix86_emit_probe_stack_range (0, size);
11635 else
11636 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11638 else
11640 if (crtl->is_leaf && !cfun->calls_alloca)
11642 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11643 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11644 size - STACK_CHECK_PROTECT);
11646 else
11647 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11652 if (allocate == 0)
11654 else if (!ix86_target_stack_probe ()
11655 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11657 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11658 GEN_INT (-allocate), -1,
11659 m->fs.cfa_reg == stack_pointer_rtx);
11661 else
11663 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11664 rtx r10 = NULL;
11665 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11666 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11667 bool eax_live = ix86_eax_live_at_start_p ();
11668 bool r10_live = false;
11670 if (TARGET_64BIT)
11671 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11673 if (eax_live)
11675 insn = emit_insn (gen_push (eax));
11676 allocate -= UNITS_PER_WORD;
11677 /* Note that SEH directives need to continue tracking the stack
11678 pointer even after the frame pointer has been set up. */
11679 if (sp_is_cfa_reg || TARGET_SEH)
11681 if (sp_is_cfa_reg)
11682 m->fs.cfa_offset += UNITS_PER_WORD;
11683 RTX_FRAME_RELATED_P (insn) = 1;
11684 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11685 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11686 plus_constant (Pmode, stack_pointer_rtx,
11687 -UNITS_PER_WORD)));
11691 if (r10_live)
11693 r10 = gen_rtx_REG (Pmode, R10_REG);
11694 insn = emit_insn (gen_push (r10));
11695 allocate -= UNITS_PER_WORD;
11696 if (sp_is_cfa_reg || TARGET_SEH)
11698 if (sp_is_cfa_reg)
11699 m->fs.cfa_offset += UNITS_PER_WORD;
11700 RTX_FRAME_RELATED_P (insn) = 1;
11701 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11702 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11703 plus_constant (Pmode, stack_pointer_rtx,
11704 -UNITS_PER_WORD)));
11708 emit_move_insn (eax, GEN_INT (allocate));
11709 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11711 /* Use the fact that AX still contains ALLOCATE. */
11712 adjust_stack_insn = (Pmode == DImode
11713 ? gen_pro_epilogue_adjust_stack_di_sub
11714 : gen_pro_epilogue_adjust_stack_si_sub);
11716 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11717 stack_pointer_rtx, eax));
11719 if (sp_is_cfa_reg || TARGET_SEH)
11721 if (sp_is_cfa_reg)
11722 m->fs.cfa_offset += allocate;
11723 RTX_FRAME_RELATED_P (insn) = 1;
11724 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11725 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11726 plus_constant (Pmode, stack_pointer_rtx,
11727 -allocate)));
11729 m->fs.sp_offset += allocate;
11731 /* Use stack_pointer_rtx for relative addressing so that code
11732 works for realigned stack, too. */
11733 if (r10_live && eax_live)
11735 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11736 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11737 gen_frame_mem (word_mode, t));
11738 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11739 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11740 gen_frame_mem (word_mode, t));
11742 else if (eax_live || r10_live)
11744 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11745 emit_move_insn (gen_rtx_REG (word_mode,
11746 (eax_live ? AX_REG : R10_REG)),
11747 gen_frame_mem (word_mode, t));
11750 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11752 /* If we havn't already set up the frame pointer, do so now. */
11753 if (frame_pointer_needed && !m->fs.fp_valid)
11755 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11756 GEN_INT (frame.stack_pointer_offset
11757 - frame.hard_frame_pointer_offset));
11758 insn = emit_insn (insn);
11759 RTX_FRAME_RELATED_P (insn) = 1;
11760 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11762 if (m->fs.cfa_reg == stack_pointer_rtx)
11763 m->fs.cfa_reg = hard_frame_pointer_rtx;
11764 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11765 m->fs.fp_valid = true;
11768 if (!int_registers_saved)
11769 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11770 if (!sse_registers_saved)
11771 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11773 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11774 in PROLOGUE. */
11775 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11777 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11778 insn = emit_insn (gen_set_got (pic));
11779 RTX_FRAME_RELATED_P (insn) = 1;
11780 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11781 emit_insn (gen_prologue_use (pic));
11782 /* Deleting already emmitted SET_GOT if exist and allocated to
11783 REAL_PIC_OFFSET_TABLE_REGNUM. */
11784 ix86_elim_entry_set_got (pic);
11787 if (crtl->drap_reg && !crtl->stack_realign_needed)
11789 /* vDRAP is setup but after reload it turns out stack realign
11790 isn't necessary, here we will emit prologue to setup DRAP
11791 without stack realign adjustment */
11792 t = choose_baseaddr (0);
11793 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11796 /* Prevent instructions from being scheduled into register save push
11797 sequence when access to the redzone area is done through frame pointer.
11798 The offset between the frame pointer and the stack pointer is calculated
11799 relative to the value of the stack pointer at the end of the function
11800 prologue, and moving instructions that access redzone area via frame
11801 pointer inside push sequence violates this assumption. */
11802 if (frame_pointer_needed && frame.red_zone_size)
11803 emit_insn (gen_memory_blockage ());
11805 /* Emit cld instruction if stringops are used in the function. */
11806 if (TARGET_CLD && ix86_current_function_needs_cld)
11807 emit_insn (gen_cld ());
11809 /* SEH requires that the prologue end within 256 bytes of the start of
11810 the function. Prevent instruction schedules that would extend that.
11811 Further, prevent alloca modifications to the stack pointer from being
11812 combined with prologue modifications. */
11813 if (TARGET_SEH)
11814 emit_insn (gen_prologue_use (stack_pointer_rtx));
11817 /* Emit code to restore REG using a POP insn. */
11819 static void
11820 ix86_emit_restore_reg_using_pop (rtx reg)
11822 struct machine_function *m = cfun->machine;
11823 rtx_insn *insn = emit_insn (gen_pop (reg));
11825 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11826 m->fs.sp_offset -= UNITS_PER_WORD;
11828 if (m->fs.cfa_reg == crtl->drap_reg
11829 && REGNO (reg) == REGNO (crtl->drap_reg))
11831 /* Previously we'd represented the CFA as an expression
11832 like *(%ebp - 8). We've just popped that value from
11833 the stack, which means we need to reset the CFA to
11834 the drap register. This will remain until we restore
11835 the stack pointer. */
11836 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11837 RTX_FRAME_RELATED_P (insn) = 1;
11839 /* This means that the DRAP register is valid for addressing too. */
11840 m->fs.drap_valid = true;
11841 return;
11844 if (m->fs.cfa_reg == stack_pointer_rtx)
11846 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11847 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11848 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11849 RTX_FRAME_RELATED_P (insn) = 1;
11851 m->fs.cfa_offset -= UNITS_PER_WORD;
11854 /* When the frame pointer is the CFA, and we pop it, we are
11855 swapping back to the stack pointer as the CFA. This happens
11856 for stack frames that don't allocate other data, so we assume
11857 the stack pointer is now pointing at the return address, i.e.
11858 the function entry state, which makes the offset be 1 word. */
11859 if (reg == hard_frame_pointer_rtx)
11861 m->fs.fp_valid = false;
11862 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11864 m->fs.cfa_reg = stack_pointer_rtx;
11865 m->fs.cfa_offset -= UNITS_PER_WORD;
11867 add_reg_note (insn, REG_CFA_DEF_CFA,
11868 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11869 GEN_INT (m->fs.cfa_offset)));
11870 RTX_FRAME_RELATED_P (insn) = 1;
11875 /* Emit code to restore saved registers using POP insns. */
11877 static void
11878 ix86_emit_restore_regs_using_pop (void)
11880 unsigned int regno;
11882 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11883 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11884 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11887 /* Emit code and notes for the LEAVE instruction. */
11889 static void
11890 ix86_emit_leave (void)
11892 struct machine_function *m = cfun->machine;
11893 rtx_insn *insn = emit_insn (ix86_gen_leave ());
11895 ix86_add_queued_cfa_restore_notes (insn);
11897 gcc_assert (m->fs.fp_valid);
11898 m->fs.sp_valid = true;
11899 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11900 m->fs.fp_valid = false;
11902 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11904 m->fs.cfa_reg = stack_pointer_rtx;
11905 m->fs.cfa_offset = m->fs.sp_offset;
11907 add_reg_note (insn, REG_CFA_DEF_CFA,
11908 plus_constant (Pmode, stack_pointer_rtx,
11909 m->fs.sp_offset));
11910 RTX_FRAME_RELATED_P (insn) = 1;
11912 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11913 m->fs.fp_offset);
11916 /* Emit code to restore saved registers using MOV insns.
11917 First register is restored from CFA - CFA_OFFSET. */
11918 static void
11919 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11920 bool maybe_eh_return)
11922 struct machine_function *m = cfun->machine;
11923 unsigned int regno;
11925 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11926 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11928 rtx reg = gen_rtx_REG (word_mode, regno);
11929 rtx mem;
11930 rtx_insn *insn;
11932 mem = choose_baseaddr (cfa_offset);
11933 mem = gen_frame_mem (word_mode, mem);
11934 insn = emit_move_insn (reg, mem);
11936 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11938 /* Previously we'd represented the CFA as an expression
11939 like *(%ebp - 8). We've just popped that value from
11940 the stack, which means we need to reset the CFA to
11941 the drap register. This will remain until we restore
11942 the stack pointer. */
11943 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11944 RTX_FRAME_RELATED_P (insn) = 1;
11946 /* This means that the DRAP register is valid for addressing. */
11947 m->fs.drap_valid = true;
11949 else
11950 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
11952 cfa_offset -= UNITS_PER_WORD;
11956 /* Emit code to restore saved registers using MOV insns.
11957 First register is restored from CFA - CFA_OFFSET. */
11958 static void
11959 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11960 bool maybe_eh_return)
11962 unsigned int regno;
11964 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11965 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11967 rtx reg = gen_rtx_REG (V4SFmode, regno);
11968 rtx mem;
11970 mem = choose_baseaddr (cfa_offset);
11971 mem = gen_rtx_MEM (V4SFmode, mem);
11972 set_mem_align (mem, 128);
11973 emit_move_insn (reg, mem);
11975 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
11977 cfa_offset -= 16;
11981 /* Restore function stack, frame, and registers. */
11983 void
11984 ix86_expand_epilogue (int style)
11986 struct machine_function *m = cfun->machine;
11987 struct machine_frame_state frame_state_save = m->fs;
11988 struct ix86_frame frame;
11989 bool restore_regs_via_mov;
11990 bool using_drap;
11992 ix86_finalize_stack_realign_flags ();
11993 ix86_compute_frame_layout (&frame);
11995 m->fs.sp_valid = (!frame_pointer_needed
11996 || (crtl->sp_is_unchanging
11997 && !stack_realign_fp));
11998 gcc_assert (!m->fs.sp_valid
11999 || m->fs.sp_offset == frame.stack_pointer_offset);
12001 /* The FP must be valid if the frame pointer is present. */
12002 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
12003 gcc_assert (!m->fs.fp_valid
12004 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
12006 /* We must have *some* valid pointer to the stack frame. */
12007 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
12009 /* The DRAP is never valid at this point. */
12010 gcc_assert (!m->fs.drap_valid);
12012 /* See the comment about red zone and frame
12013 pointer usage in ix86_expand_prologue. */
12014 if (frame_pointer_needed && frame.red_zone_size)
12015 emit_insn (gen_memory_blockage ());
12017 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
12018 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
12020 /* Determine the CFA offset of the end of the red-zone. */
12021 m->fs.red_zone_offset = 0;
12022 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
12024 /* The red-zone begins below the return address. */
12025 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
12027 /* When the register save area is in the aligned portion of
12028 the stack, determine the maximum runtime displacement that
12029 matches up with the aligned frame. */
12030 if (stack_realign_drap)
12031 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
12032 + UNITS_PER_WORD);
12035 /* Special care must be taken for the normal return case of a function
12036 using eh_return: the eax and edx registers are marked as saved, but
12037 not restored along this path. Adjust the save location to match. */
12038 if (crtl->calls_eh_return && style != 2)
12039 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
12041 /* EH_RETURN requires the use of moves to function properly. */
12042 if (crtl->calls_eh_return)
12043 restore_regs_via_mov = true;
12044 /* SEH requires the use of pops to identify the epilogue. */
12045 else if (TARGET_SEH)
12046 restore_regs_via_mov = false;
12047 /* If we're only restoring one register and sp is not valid then
12048 using a move instruction to restore the register since it's
12049 less work than reloading sp and popping the register. */
12050 else if (!m->fs.sp_valid && frame.nregs <= 1)
12051 restore_regs_via_mov = true;
12052 else if (TARGET_EPILOGUE_USING_MOVE
12053 && cfun->machine->use_fast_prologue_epilogue
12054 && (frame.nregs > 1
12055 || m->fs.sp_offset != frame.reg_save_offset))
12056 restore_regs_via_mov = true;
12057 else if (frame_pointer_needed
12058 && !frame.nregs
12059 && m->fs.sp_offset != frame.reg_save_offset)
12060 restore_regs_via_mov = true;
12061 else if (frame_pointer_needed
12062 && TARGET_USE_LEAVE
12063 && cfun->machine->use_fast_prologue_epilogue
12064 && frame.nregs == 1)
12065 restore_regs_via_mov = true;
12066 else
12067 restore_regs_via_mov = false;
12069 if (restore_regs_via_mov || frame.nsseregs)
12071 /* Ensure that the entire register save area is addressable via
12072 the stack pointer, if we will restore via sp. */
12073 if (TARGET_64BIT
12074 && m->fs.sp_offset > 0x7fffffff
12075 && !(m->fs.fp_valid || m->fs.drap_valid)
12076 && (frame.nsseregs + frame.nregs) != 0)
12078 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12079 GEN_INT (m->fs.sp_offset
12080 - frame.sse_reg_save_offset),
12081 style,
12082 m->fs.cfa_reg == stack_pointer_rtx);
12086 /* If there are any SSE registers to restore, then we have to do it
12087 via moves, since there's obviously no pop for SSE regs. */
12088 if (frame.nsseregs)
12089 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
12090 style == 2);
12092 if (restore_regs_via_mov)
12094 rtx t;
12096 if (frame.nregs)
12097 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
12099 /* eh_return epilogues need %ecx added to the stack pointer. */
12100 if (style == 2)
12102 rtx sa = EH_RETURN_STACKADJ_RTX;
12103 rtx_insn *insn;
12105 /* Stack align doesn't work with eh_return. */
12106 gcc_assert (!stack_realign_drap);
12107 /* Neither does regparm nested functions. */
12108 gcc_assert (!ix86_static_chain_on_stack);
12110 if (frame_pointer_needed)
12112 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
12113 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
12114 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
12116 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
12117 insn = emit_move_insn (hard_frame_pointer_rtx, t);
12119 /* Note that we use SA as a temporary CFA, as the return
12120 address is at the proper place relative to it. We
12121 pretend this happens at the FP restore insn because
12122 prior to this insn the FP would be stored at the wrong
12123 offset relative to SA, and after this insn we have no
12124 other reasonable register to use for the CFA. We don't
12125 bother resetting the CFA to the SP for the duration of
12126 the return insn. */
12127 add_reg_note (insn, REG_CFA_DEF_CFA,
12128 plus_constant (Pmode, sa, UNITS_PER_WORD));
12129 ix86_add_queued_cfa_restore_notes (insn);
12130 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
12131 RTX_FRAME_RELATED_P (insn) = 1;
12133 m->fs.cfa_reg = sa;
12134 m->fs.cfa_offset = UNITS_PER_WORD;
12135 m->fs.fp_valid = false;
12137 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
12138 const0_rtx, style, false);
12140 else
12142 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
12143 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
12144 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
12145 ix86_add_queued_cfa_restore_notes (insn);
12147 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
12148 if (m->fs.cfa_offset != UNITS_PER_WORD)
12150 m->fs.cfa_offset = UNITS_PER_WORD;
12151 add_reg_note (insn, REG_CFA_DEF_CFA,
12152 plus_constant (Pmode, stack_pointer_rtx,
12153 UNITS_PER_WORD));
12154 RTX_FRAME_RELATED_P (insn) = 1;
12157 m->fs.sp_offset = UNITS_PER_WORD;
12158 m->fs.sp_valid = true;
12161 else
12163 /* SEH requires that the function end with (1) a stack adjustment
12164 if necessary, (2) a sequence of pops, and (3) a return or
12165 jump instruction. Prevent insns from the function body from
12166 being scheduled into this sequence. */
12167 if (TARGET_SEH)
12169 /* Prevent a catch region from being adjacent to the standard
12170 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
12171 several other flags that would be interesting to test are
12172 not yet set up. */
12173 if (flag_non_call_exceptions)
12174 emit_insn (gen_nops (const1_rtx));
12175 else
12176 emit_insn (gen_blockage ());
12179 /* First step is to deallocate the stack frame so that we can
12180 pop the registers. Also do it on SEH target for very large
12181 frame as the emitted instructions aren't allowed by the ABI in
12182 epilogues. */
12183 if (!m->fs.sp_valid
12184 || (TARGET_SEH
12185 && (m->fs.sp_offset - frame.reg_save_offset
12186 >= SEH_MAX_FRAME_SIZE)))
12188 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
12189 GEN_INT (m->fs.fp_offset
12190 - frame.reg_save_offset),
12191 style, false);
12193 else if (m->fs.sp_offset != frame.reg_save_offset)
12195 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12196 GEN_INT (m->fs.sp_offset
12197 - frame.reg_save_offset),
12198 style,
12199 m->fs.cfa_reg == stack_pointer_rtx);
12202 ix86_emit_restore_regs_using_pop ();
12205 /* If we used a stack pointer and haven't already got rid of it,
12206 then do so now. */
12207 if (m->fs.fp_valid)
12209 /* If the stack pointer is valid and pointing at the frame
12210 pointer store address, then we only need a pop. */
12211 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12212 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12213 /* Leave results in shorter dependency chains on CPUs that are
12214 able to grok it fast. */
12215 else if (TARGET_USE_LEAVE
12216 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12217 || !cfun->machine->use_fast_prologue_epilogue)
12218 ix86_emit_leave ();
12219 else
12221 pro_epilogue_adjust_stack (stack_pointer_rtx,
12222 hard_frame_pointer_rtx,
12223 const0_rtx, style, !using_drap);
12224 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12228 if (using_drap)
12230 int param_ptr_offset = UNITS_PER_WORD;
12231 rtx_insn *insn;
12233 gcc_assert (stack_realign_drap);
12235 if (ix86_static_chain_on_stack)
12236 param_ptr_offset += UNITS_PER_WORD;
12237 if (!call_used_regs[REGNO (crtl->drap_reg)])
12238 param_ptr_offset += UNITS_PER_WORD;
12240 insn = emit_insn (gen_rtx_SET
12241 (VOIDmode, stack_pointer_rtx,
12242 gen_rtx_PLUS (Pmode,
12243 crtl->drap_reg,
12244 GEN_INT (-param_ptr_offset))));
12245 m->fs.cfa_reg = stack_pointer_rtx;
12246 m->fs.cfa_offset = param_ptr_offset;
12247 m->fs.sp_offset = param_ptr_offset;
12248 m->fs.realigned = false;
12250 add_reg_note (insn, REG_CFA_DEF_CFA,
12251 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12252 GEN_INT (param_ptr_offset)));
12253 RTX_FRAME_RELATED_P (insn) = 1;
12255 if (!call_used_regs[REGNO (crtl->drap_reg)])
12256 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12259 /* At this point the stack pointer must be valid, and we must have
12260 restored all of the registers. We may not have deallocated the
12261 entire stack frame. We've delayed this until now because it may
12262 be possible to merge the local stack deallocation with the
12263 deallocation forced by ix86_static_chain_on_stack. */
12264 gcc_assert (m->fs.sp_valid);
12265 gcc_assert (!m->fs.fp_valid);
12266 gcc_assert (!m->fs.realigned);
12267 if (m->fs.sp_offset != UNITS_PER_WORD)
12269 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12270 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12271 style, true);
12273 else
12274 ix86_add_queued_cfa_restore_notes (get_last_insn ());
12276 /* Sibcall epilogues don't want a return instruction. */
12277 if (style == 0)
12279 m->fs = frame_state_save;
12280 return;
12283 if (crtl->args.pops_args && crtl->args.size)
12285 rtx popc = GEN_INT (crtl->args.pops_args);
12287 /* i386 can only pop 64K bytes. If asked to pop more, pop return
12288 address, do explicit add, and jump indirectly to the caller. */
12290 if (crtl->args.pops_args >= 65536)
12292 rtx ecx = gen_rtx_REG (SImode, CX_REG);
12293 rtx_insn *insn;
12295 /* There is no "pascal" calling convention in any 64bit ABI. */
12296 gcc_assert (!TARGET_64BIT);
12298 insn = emit_insn (gen_pop (ecx));
12299 m->fs.cfa_offset -= UNITS_PER_WORD;
12300 m->fs.sp_offset -= UNITS_PER_WORD;
12302 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12303 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
12304 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12305 add_reg_note (insn, REG_CFA_REGISTER,
12306 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
12307 RTX_FRAME_RELATED_P (insn) = 1;
12309 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12310 popc, -1, true);
12311 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12313 else
12314 emit_jump_insn (gen_simple_return_pop_internal (popc));
12316 else
12317 emit_jump_insn (gen_simple_return_internal ());
12319 /* Restore the state back to the state from the prologue,
12320 so that it's correct for the next epilogue. */
12321 m->fs = frame_state_save;
12324 /* Reset from the function's potential modifications. */
12326 static void
12327 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12329 if (pic_offset_table_rtx
12330 && !ix86_use_pseudo_pic_reg ())
12331 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12332 #if TARGET_MACHO
12333 /* Mach-O doesn't support labels at the end of objects, so if
12334 it looks like we might want one, insert a NOP. */
12336 rtx_insn *insn = get_last_insn ();
12337 rtx_insn *deleted_debug_label = NULL;
12338 while (insn
12339 && NOTE_P (insn)
12340 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12342 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12343 notes only, instead set their CODE_LABEL_NUMBER to -1,
12344 otherwise there would be code generation differences
12345 in between -g and -g0. */
12346 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12347 deleted_debug_label = insn;
12348 insn = PREV_INSN (insn);
12350 if (insn
12351 && (LABEL_P (insn)
12352 || (NOTE_P (insn)
12353 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12354 fputs ("\tnop\n", file);
12355 else if (deleted_debug_label)
12356 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12357 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12358 CODE_LABEL_NUMBER (insn) = -1;
12360 #endif
12364 /* Return a scratch register to use in the split stack prologue. The
12365 split stack prologue is used for -fsplit-stack. It is the first
12366 instructions in the function, even before the regular prologue.
12367 The scratch register can be any caller-saved register which is not
12368 used for parameters or for the static chain. */
12370 static unsigned int
12371 split_stack_prologue_scratch_regno (void)
12373 if (TARGET_64BIT)
12374 return R11_REG;
12375 else
12377 bool is_fastcall, is_thiscall;
12378 int regparm;
12380 is_fastcall = (lookup_attribute ("fastcall",
12381 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12382 != NULL);
12383 is_thiscall = (lookup_attribute ("thiscall",
12384 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12385 != NULL);
12386 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12388 if (is_fastcall)
12390 if (DECL_STATIC_CHAIN (cfun->decl))
12392 sorry ("-fsplit-stack does not support fastcall with "
12393 "nested function");
12394 return INVALID_REGNUM;
12396 return AX_REG;
12398 else if (is_thiscall)
12400 if (!DECL_STATIC_CHAIN (cfun->decl))
12401 return DX_REG;
12402 return AX_REG;
12404 else if (regparm < 3)
12406 if (!DECL_STATIC_CHAIN (cfun->decl))
12407 return CX_REG;
12408 else
12410 if (regparm >= 2)
12412 sorry ("-fsplit-stack does not support 2 register "
12413 "parameters for a nested function");
12414 return INVALID_REGNUM;
12416 return DX_REG;
12419 else
12421 /* FIXME: We could make this work by pushing a register
12422 around the addition and comparison. */
12423 sorry ("-fsplit-stack does not support 3 register parameters");
12424 return INVALID_REGNUM;
12429 /* A SYMBOL_REF for the function which allocates new stackspace for
12430 -fsplit-stack. */
12432 static GTY(()) rtx split_stack_fn;
12434 /* A SYMBOL_REF for the more stack function when using the large
12435 model. */
12437 static GTY(()) rtx split_stack_fn_large;
12439 /* Handle -fsplit-stack. These are the first instructions in the
12440 function, even before the regular prologue. */
12442 void
12443 ix86_expand_split_stack_prologue (void)
12445 struct ix86_frame frame;
12446 HOST_WIDE_INT allocate;
12447 unsigned HOST_WIDE_INT args_size;
12448 rtx_code_label *label;
12449 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12450 rtx scratch_reg = NULL_RTX;
12451 rtx_code_label *varargs_label = NULL;
12452 rtx fn;
12454 gcc_assert (flag_split_stack && reload_completed);
12456 ix86_finalize_stack_realign_flags ();
12457 ix86_compute_frame_layout (&frame);
12458 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12460 /* This is the label we will branch to if we have enough stack
12461 space. We expect the basic block reordering pass to reverse this
12462 branch if optimizing, so that we branch in the unlikely case. */
12463 label = gen_label_rtx ();
12465 /* We need to compare the stack pointer minus the frame size with
12466 the stack boundary in the TCB. The stack boundary always gives
12467 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12468 can compare directly. Otherwise we need to do an addition. */
12470 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12471 UNSPEC_STACK_CHECK);
12472 limit = gen_rtx_CONST (Pmode, limit);
12473 limit = gen_rtx_MEM (Pmode, limit);
12474 if (allocate < SPLIT_STACK_AVAILABLE)
12475 current = stack_pointer_rtx;
12476 else
12478 unsigned int scratch_regno;
12479 rtx offset;
12481 /* We need a scratch register to hold the stack pointer minus
12482 the required frame size. Since this is the very start of the
12483 function, the scratch register can be any caller-saved
12484 register which is not used for parameters. */
12485 offset = GEN_INT (- allocate);
12486 scratch_regno = split_stack_prologue_scratch_regno ();
12487 if (scratch_regno == INVALID_REGNUM)
12488 return;
12489 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12490 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12492 /* We don't use ix86_gen_add3 in this case because it will
12493 want to split to lea, but when not optimizing the insn
12494 will not be split after this point. */
12495 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12496 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12497 offset)));
12499 else
12501 emit_move_insn (scratch_reg, offset);
12502 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12503 stack_pointer_rtx));
12505 current = scratch_reg;
12508 ix86_expand_branch (GEU, current, limit, label);
12509 jump_insn = get_last_insn ();
12510 JUMP_LABEL (jump_insn) = label;
12512 /* Mark the jump as very likely to be taken. */
12513 add_int_reg_note (jump_insn, REG_BR_PROB,
12514 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12516 if (split_stack_fn == NULL_RTX)
12518 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12519 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12521 fn = split_stack_fn;
12523 /* Get more stack space. We pass in the desired stack space and the
12524 size of the arguments to copy to the new stack. In 32-bit mode
12525 we push the parameters; __morestack will return on a new stack
12526 anyhow. In 64-bit mode we pass the parameters in r10 and
12527 r11. */
12528 allocate_rtx = GEN_INT (allocate);
12529 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12530 call_fusage = NULL_RTX;
12531 if (TARGET_64BIT)
12533 rtx reg10, reg11;
12535 reg10 = gen_rtx_REG (Pmode, R10_REG);
12536 reg11 = gen_rtx_REG (Pmode, R11_REG);
12538 /* If this function uses a static chain, it will be in %r10.
12539 Preserve it across the call to __morestack. */
12540 if (DECL_STATIC_CHAIN (cfun->decl))
12542 rtx rax;
12544 rax = gen_rtx_REG (word_mode, AX_REG);
12545 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12546 use_reg (&call_fusage, rax);
12549 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12550 && !TARGET_PECOFF)
12552 HOST_WIDE_INT argval;
12554 gcc_assert (Pmode == DImode);
12555 /* When using the large model we need to load the address
12556 into a register, and we've run out of registers. So we
12557 switch to a different calling convention, and we call a
12558 different function: __morestack_large. We pass the
12559 argument size in the upper 32 bits of r10 and pass the
12560 frame size in the lower 32 bits. */
12561 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12562 gcc_assert ((args_size & 0xffffffff) == args_size);
12564 if (split_stack_fn_large == NULL_RTX)
12566 split_stack_fn_large =
12567 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12568 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12570 if (ix86_cmodel == CM_LARGE_PIC)
12572 rtx_code_label *label;
12573 rtx x;
12575 label = gen_label_rtx ();
12576 emit_label (label);
12577 LABEL_PRESERVE_P (label) = 1;
12578 emit_insn (gen_set_rip_rex64 (reg10, label));
12579 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12580 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12581 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12582 UNSPEC_GOT);
12583 x = gen_rtx_CONST (Pmode, x);
12584 emit_move_insn (reg11, x);
12585 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12586 x = gen_const_mem (Pmode, x);
12587 emit_move_insn (reg11, x);
12589 else
12590 emit_move_insn (reg11, split_stack_fn_large);
12592 fn = reg11;
12594 argval = ((args_size << 16) << 16) + allocate;
12595 emit_move_insn (reg10, GEN_INT (argval));
12597 else
12599 emit_move_insn (reg10, allocate_rtx);
12600 emit_move_insn (reg11, GEN_INT (args_size));
12601 use_reg (&call_fusage, reg11);
12604 use_reg (&call_fusage, reg10);
12606 else
12608 emit_insn (gen_push (GEN_INT (args_size)));
12609 emit_insn (gen_push (allocate_rtx));
12611 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12612 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12613 NULL_RTX, false);
12614 add_function_usage_to (call_insn, call_fusage);
12616 /* In order to make call/return prediction work right, we now need
12617 to execute a return instruction. See
12618 libgcc/config/i386/morestack.S for the details on how this works.
12620 For flow purposes gcc must not see this as a return
12621 instruction--we need control flow to continue at the subsequent
12622 label. Therefore, we use an unspec. */
12623 gcc_assert (crtl->args.pops_args < 65536);
12624 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12626 /* If we are in 64-bit mode and this function uses a static chain,
12627 we saved %r10 in %rax before calling _morestack. */
12628 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12629 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12630 gen_rtx_REG (word_mode, AX_REG));
12632 /* If this function calls va_start, we need to store a pointer to
12633 the arguments on the old stack, because they may not have been
12634 all copied to the new stack. At this point the old stack can be
12635 found at the frame pointer value used by __morestack, because
12636 __morestack has set that up before calling back to us. Here we
12637 store that pointer in a scratch register, and in
12638 ix86_expand_prologue we store the scratch register in a stack
12639 slot. */
12640 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12642 unsigned int scratch_regno;
12643 rtx frame_reg;
12644 int words;
12646 scratch_regno = split_stack_prologue_scratch_regno ();
12647 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12648 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12650 /* 64-bit:
12651 fp -> old fp value
12652 return address within this function
12653 return address of caller of this function
12654 stack arguments
12655 So we add three words to get to the stack arguments.
12657 32-bit:
12658 fp -> old fp value
12659 return address within this function
12660 first argument to __morestack
12661 second argument to __morestack
12662 return address of caller of this function
12663 stack arguments
12664 So we add five words to get to the stack arguments.
12666 words = TARGET_64BIT ? 3 : 5;
12667 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12668 gen_rtx_PLUS (Pmode, frame_reg,
12669 GEN_INT (words * UNITS_PER_WORD))));
12671 varargs_label = gen_label_rtx ();
12672 emit_jump_insn (gen_jump (varargs_label));
12673 JUMP_LABEL (get_last_insn ()) = varargs_label;
12675 emit_barrier ();
12678 emit_label (label);
12679 LABEL_NUSES (label) = 1;
12681 /* If this function calls va_start, we now have to set the scratch
12682 register for the case where we do not call __morestack. In this
12683 case we need to set it based on the stack pointer. */
12684 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12686 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12687 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12688 GEN_INT (UNITS_PER_WORD))));
12690 emit_label (varargs_label);
12691 LABEL_NUSES (varargs_label) = 1;
12695 /* We may have to tell the dataflow pass that the split stack prologue
12696 is initializing a scratch register. */
12698 static void
12699 ix86_live_on_entry (bitmap regs)
12701 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12703 gcc_assert (flag_split_stack);
12704 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12708 /* Extract the parts of an RTL expression that is a valid memory address
12709 for an instruction. Return 0 if the structure of the address is
12710 grossly off. Return -1 if the address contains ASHIFT, so it is not
12711 strictly valid, but still used for computing length of lea instruction. */
12714 ix86_decompose_address (rtx addr, struct ix86_address *out)
12716 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12717 rtx base_reg, index_reg;
12718 HOST_WIDE_INT scale = 1;
12719 rtx scale_rtx = NULL_RTX;
12720 rtx tmp;
12721 int retval = 1;
12722 enum ix86_address_seg seg = SEG_DEFAULT;
12724 /* Allow zero-extended SImode addresses,
12725 they will be emitted with addr32 prefix. */
12726 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12728 if (GET_CODE (addr) == ZERO_EXTEND
12729 && GET_MODE (XEXP (addr, 0)) == SImode)
12731 addr = XEXP (addr, 0);
12732 if (CONST_INT_P (addr))
12733 return 0;
12735 else if (GET_CODE (addr) == AND
12736 && const_32bit_mask (XEXP (addr, 1), DImode))
12738 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12739 if (addr == NULL_RTX)
12740 return 0;
12742 if (CONST_INT_P (addr))
12743 return 0;
12747 /* Allow SImode subregs of DImode addresses,
12748 they will be emitted with addr32 prefix. */
12749 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12751 if (GET_CODE (addr) == SUBREG
12752 && GET_MODE (SUBREG_REG (addr)) == DImode)
12754 addr = SUBREG_REG (addr);
12755 if (CONST_INT_P (addr))
12756 return 0;
12760 if (REG_P (addr))
12761 base = addr;
12762 else if (GET_CODE (addr) == SUBREG)
12764 if (REG_P (SUBREG_REG (addr)))
12765 base = addr;
12766 else
12767 return 0;
12769 else if (GET_CODE (addr) == PLUS)
12771 rtx addends[4], op;
12772 int n = 0, i;
12774 op = addr;
12777 if (n >= 4)
12778 return 0;
12779 addends[n++] = XEXP (op, 1);
12780 op = XEXP (op, 0);
12782 while (GET_CODE (op) == PLUS);
12783 if (n >= 4)
12784 return 0;
12785 addends[n] = op;
12787 for (i = n; i >= 0; --i)
12789 op = addends[i];
12790 switch (GET_CODE (op))
12792 case MULT:
12793 if (index)
12794 return 0;
12795 index = XEXP (op, 0);
12796 scale_rtx = XEXP (op, 1);
12797 break;
12799 case ASHIFT:
12800 if (index)
12801 return 0;
12802 index = XEXP (op, 0);
12803 tmp = XEXP (op, 1);
12804 if (!CONST_INT_P (tmp))
12805 return 0;
12806 scale = INTVAL (tmp);
12807 if ((unsigned HOST_WIDE_INT) scale > 3)
12808 return 0;
12809 scale = 1 << scale;
12810 break;
12812 case ZERO_EXTEND:
12813 op = XEXP (op, 0);
12814 if (GET_CODE (op) != UNSPEC)
12815 return 0;
12816 /* FALLTHRU */
12818 case UNSPEC:
12819 if (XINT (op, 1) == UNSPEC_TP
12820 && TARGET_TLS_DIRECT_SEG_REFS
12821 && seg == SEG_DEFAULT)
12822 seg = DEFAULT_TLS_SEG_REG;
12823 else
12824 return 0;
12825 break;
12827 case SUBREG:
12828 if (!REG_P (SUBREG_REG (op)))
12829 return 0;
12830 /* FALLTHRU */
12832 case REG:
12833 if (!base)
12834 base = op;
12835 else if (!index)
12836 index = op;
12837 else
12838 return 0;
12839 break;
12841 case CONST:
12842 case CONST_INT:
12843 case SYMBOL_REF:
12844 case LABEL_REF:
12845 if (disp)
12846 return 0;
12847 disp = op;
12848 break;
12850 default:
12851 return 0;
12855 else if (GET_CODE (addr) == MULT)
12857 index = XEXP (addr, 0); /* index*scale */
12858 scale_rtx = XEXP (addr, 1);
12860 else if (GET_CODE (addr) == ASHIFT)
12862 /* We're called for lea too, which implements ashift on occasion. */
12863 index = XEXP (addr, 0);
12864 tmp = XEXP (addr, 1);
12865 if (!CONST_INT_P (tmp))
12866 return 0;
12867 scale = INTVAL (tmp);
12868 if ((unsigned HOST_WIDE_INT) scale > 3)
12869 return 0;
12870 scale = 1 << scale;
12871 retval = -1;
12873 else
12874 disp = addr; /* displacement */
12876 if (index)
12878 if (REG_P (index))
12880 else if (GET_CODE (index) == SUBREG
12881 && REG_P (SUBREG_REG (index)))
12883 else
12884 return 0;
12887 /* Extract the integral value of scale. */
12888 if (scale_rtx)
12890 if (!CONST_INT_P (scale_rtx))
12891 return 0;
12892 scale = INTVAL (scale_rtx);
12895 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12896 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12898 /* Avoid useless 0 displacement. */
12899 if (disp == const0_rtx && (base || index))
12900 disp = NULL_RTX;
12902 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12903 if (base_reg && index_reg && scale == 1
12904 && (index_reg == arg_pointer_rtx
12905 || index_reg == frame_pointer_rtx
12906 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12908 std::swap (base, index);
12909 std::swap (base_reg, index_reg);
12912 /* Special case: %ebp cannot be encoded as a base without a displacement.
12913 Similarly %r13. */
12914 if (!disp
12915 && base_reg
12916 && (base_reg == hard_frame_pointer_rtx
12917 || base_reg == frame_pointer_rtx
12918 || base_reg == arg_pointer_rtx
12919 || (REG_P (base_reg)
12920 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12921 || REGNO (base_reg) == R13_REG))))
12922 disp = const0_rtx;
12924 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12925 Avoid this by transforming to [%esi+0].
12926 Reload calls address legitimization without cfun defined, so we need
12927 to test cfun for being non-NULL. */
12928 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12929 && base_reg && !index_reg && !disp
12930 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12931 disp = const0_rtx;
12933 /* Special case: encode reg+reg instead of reg*2. */
12934 if (!base && index && scale == 2)
12935 base = index, base_reg = index_reg, scale = 1;
12937 /* Special case: scaling cannot be encoded without base or displacement. */
12938 if (!base && !disp && index && scale != 1)
12939 disp = const0_rtx;
12941 out->base = base;
12942 out->index = index;
12943 out->disp = disp;
12944 out->scale = scale;
12945 out->seg = seg;
12947 return retval;
12950 /* Return cost of the memory address x.
12951 For i386, it is better to use a complex address than let gcc copy
12952 the address into a reg and make a new pseudo. But not if the address
12953 requires to two regs - that would mean more pseudos with longer
12954 lifetimes. */
12955 static int
12956 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
12958 struct ix86_address parts;
12959 int cost = 1;
12960 int ok = ix86_decompose_address (x, &parts);
12962 gcc_assert (ok);
12964 if (parts.base && GET_CODE (parts.base) == SUBREG)
12965 parts.base = SUBREG_REG (parts.base);
12966 if (parts.index && GET_CODE (parts.index) == SUBREG)
12967 parts.index = SUBREG_REG (parts.index);
12969 /* Attempt to minimize number of registers in the address by increasing
12970 address cost for each used register. We don't increase address cost
12971 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
12972 is not invariant itself it most likely means that base or index is not
12973 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
12974 which is not profitable for x86. */
12975 if (parts.base
12976 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12977 && (current_pass->type == GIMPLE_PASS
12978 || !pic_offset_table_rtx
12979 || !REG_P (parts.base)
12980 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
12981 cost++;
12983 if (parts.index
12984 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12985 && (current_pass->type == GIMPLE_PASS
12986 || !pic_offset_table_rtx
12987 || !REG_P (parts.index)
12988 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
12989 cost++;
12991 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12992 since it's predecode logic can't detect the length of instructions
12993 and it degenerates to vector decoded. Increase cost of such
12994 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12995 to split such addresses or even refuse such addresses at all.
12997 Following addressing modes are affected:
12998 [base+scale*index]
12999 [scale*index+disp]
13000 [base+index]
13002 The first and last case may be avoidable by explicitly coding the zero in
13003 memory address, but I don't have AMD-K6 machine handy to check this
13004 theory. */
13006 if (TARGET_K6
13007 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
13008 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
13009 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
13010 cost += 10;
13012 return cost;
13015 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
13016 this is used for to form addresses to local data when -fPIC is in
13017 use. */
13019 static bool
13020 darwin_local_data_pic (rtx disp)
13022 return (GET_CODE (disp) == UNSPEC
13023 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
13026 /* Determine if a given RTX is a valid constant. We already know this
13027 satisfies CONSTANT_P. */
13029 static bool
13030 ix86_legitimate_constant_p (machine_mode, rtx x)
13032 /* Pointer bounds constants are not valid. */
13033 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
13034 return false;
13036 switch (GET_CODE (x))
13038 case CONST:
13039 x = XEXP (x, 0);
13041 if (GET_CODE (x) == PLUS)
13043 if (!CONST_INT_P (XEXP (x, 1)))
13044 return false;
13045 x = XEXP (x, 0);
13048 if (TARGET_MACHO && darwin_local_data_pic (x))
13049 return true;
13051 /* Only some unspecs are valid as "constants". */
13052 if (GET_CODE (x) == UNSPEC)
13053 switch (XINT (x, 1))
13055 case UNSPEC_GOT:
13056 case UNSPEC_GOTOFF:
13057 case UNSPEC_PLTOFF:
13058 return TARGET_64BIT;
13059 case UNSPEC_TPOFF:
13060 case UNSPEC_NTPOFF:
13061 x = XVECEXP (x, 0, 0);
13062 return (GET_CODE (x) == SYMBOL_REF
13063 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13064 case UNSPEC_DTPOFF:
13065 x = XVECEXP (x, 0, 0);
13066 return (GET_CODE (x) == SYMBOL_REF
13067 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
13068 default:
13069 return false;
13072 /* We must have drilled down to a symbol. */
13073 if (GET_CODE (x) == LABEL_REF)
13074 return true;
13075 if (GET_CODE (x) != SYMBOL_REF)
13076 return false;
13077 /* FALLTHRU */
13079 case SYMBOL_REF:
13080 /* TLS symbols are never valid. */
13081 if (SYMBOL_REF_TLS_MODEL (x))
13082 return false;
13084 /* DLLIMPORT symbols are never valid. */
13085 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13086 && SYMBOL_REF_DLLIMPORT_P (x))
13087 return false;
13089 #if TARGET_MACHO
13090 /* mdynamic-no-pic */
13091 if (MACHO_DYNAMIC_NO_PIC_P)
13092 return machopic_symbol_defined_p (x);
13093 #endif
13094 break;
13096 case CONST_WIDE_INT:
13097 if (!TARGET_64BIT && !standard_sse_constant_p (x))
13098 return false;
13099 break;
13101 case CONST_VECTOR:
13102 if (!standard_sse_constant_p (x))
13103 return false;
13105 default:
13106 break;
13109 /* Otherwise we handle everything else in the move patterns. */
13110 return true;
13113 /* Determine if it's legal to put X into the constant pool. This
13114 is not possible for the address of thread-local symbols, which
13115 is checked above. */
13117 static bool
13118 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
13120 /* We can always put integral constants and vectors in memory. */
13121 switch (GET_CODE (x))
13123 case CONST_INT:
13124 case CONST_WIDE_INT:
13125 case CONST_DOUBLE:
13126 case CONST_VECTOR:
13127 return false;
13129 default:
13130 break;
13132 return !ix86_legitimate_constant_p (mode, x);
13135 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
13136 otherwise zero. */
13138 static bool
13139 is_imported_p (rtx x)
13141 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
13142 || GET_CODE (x) != SYMBOL_REF)
13143 return false;
13145 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
13149 /* Nonzero if the constant value X is a legitimate general operand
13150 when generating PIC code. It is given that flag_pic is on and
13151 that X satisfies CONSTANT_P. */
13153 bool
13154 legitimate_pic_operand_p (rtx x)
13156 rtx inner;
13158 switch (GET_CODE (x))
13160 case CONST:
13161 inner = XEXP (x, 0);
13162 if (GET_CODE (inner) == PLUS
13163 && CONST_INT_P (XEXP (inner, 1)))
13164 inner = XEXP (inner, 0);
13166 /* Only some unspecs are valid as "constants". */
13167 if (GET_CODE (inner) == UNSPEC)
13168 switch (XINT (inner, 1))
13170 case UNSPEC_GOT:
13171 case UNSPEC_GOTOFF:
13172 case UNSPEC_PLTOFF:
13173 return TARGET_64BIT;
13174 case UNSPEC_TPOFF:
13175 x = XVECEXP (inner, 0, 0);
13176 return (GET_CODE (x) == SYMBOL_REF
13177 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13178 case UNSPEC_MACHOPIC_OFFSET:
13179 return legitimate_pic_address_disp_p (x);
13180 default:
13181 return false;
13183 /* FALLTHRU */
13185 case SYMBOL_REF:
13186 case LABEL_REF:
13187 return legitimate_pic_address_disp_p (x);
13189 default:
13190 return true;
13194 /* Determine if a given CONST RTX is a valid memory displacement
13195 in PIC mode. */
13197 bool
13198 legitimate_pic_address_disp_p (rtx disp)
13200 bool saw_plus;
13202 /* In 64bit mode we can allow direct addresses of symbols and labels
13203 when they are not dynamic symbols. */
13204 if (TARGET_64BIT)
13206 rtx op0 = disp, op1;
13208 switch (GET_CODE (disp))
13210 case LABEL_REF:
13211 return true;
13213 case CONST:
13214 if (GET_CODE (XEXP (disp, 0)) != PLUS)
13215 break;
13216 op0 = XEXP (XEXP (disp, 0), 0);
13217 op1 = XEXP (XEXP (disp, 0), 1);
13218 if (!CONST_INT_P (op1)
13219 || INTVAL (op1) >= 16*1024*1024
13220 || INTVAL (op1) < -16*1024*1024)
13221 break;
13222 if (GET_CODE (op0) == LABEL_REF)
13223 return true;
13224 if (GET_CODE (op0) == CONST
13225 && GET_CODE (XEXP (op0, 0)) == UNSPEC
13226 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13227 return true;
13228 if (GET_CODE (op0) == UNSPEC
13229 && XINT (op0, 1) == UNSPEC_PCREL)
13230 return true;
13231 if (GET_CODE (op0) != SYMBOL_REF)
13232 break;
13233 /* FALLTHRU */
13235 case SYMBOL_REF:
13236 /* TLS references should always be enclosed in UNSPEC.
13237 The dllimported symbol needs always to be resolved. */
13238 if (SYMBOL_REF_TLS_MODEL (op0)
13239 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13240 return false;
13242 if (TARGET_PECOFF)
13244 if (is_imported_p (op0))
13245 return true;
13247 if (SYMBOL_REF_FAR_ADDR_P (op0)
13248 || !SYMBOL_REF_LOCAL_P (op0))
13249 break;
13251 /* Function-symbols need to be resolved only for
13252 large-model.
13253 For the small-model we don't need to resolve anything
13254 here. */
13255 if ((ix86_cmodel != CM_LARGE_PIC
13256 && SYMBOL_REF_FUNCTION_P (op0))
13257 || ix86_cmodel == CM_SMALL_PIC)
13258 return true;
13259 /* Non-external symbols don't need to be resolved for
13260 large, and medium-model. */
13261 if ((ix86_cmodel == CM_LARGE_PIC
13262 || ix86_cmodel == CM_MEDIUM_PIC)
13263 && !SYMBOL_REF_EXTERNAL_P (op0))
13264 return true;
13266 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13267 && (SYMBOL_REF_LOCAL_P (op0)
13268 || (HAVE_LD_PIE_COPYRELOC
13269 && flag_pie
13270 && !SYMBOL_REF_WEAK (op0)
13271 && !SYMBOL_REF_FUNCTION_P (op0)))
13272 && ix86_cmodel != CM_LARGE_PIC)
13273 return true;
13274 break;
13276 default:
13277 break;
13280 if (GET_CODE (disp) != CONST)
13281 return false;
13282 disp = XEXP (disp, 0);
13284 if (TARGET_64BIT)
13286 /* We are unsafe to allow PLUS expressions. This limit allowed distance
13287 of GOT tables. We should not need these anyway. */
13288 if (GET_CODE (disp) != UNSPEC
13289 || (XINT (disp, 1) != UNSPEC_GOTPCREL
13290 && XINT (disp, 1) != UNSPEC_GOTOFF
13291 && XINT (disp, 1) != UNSPEC_PCREL
13292 && XINT (disp, 1) != UNSPEC_PLTOFF))
13293 return false;
13295 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13296 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13297 return false;
13298 return true;
13301 saw_plus = false;
13302 if (GET_CODE (disp) == PLUS)
13304 if (!CONST_INT_P (XEXP (disp, 1)))
13305 return false;
13306 disp = XEXP (disp, 0);
13307 saw_plus = true;
13310 if (TARGET_MACHO && darwin_local_data_pic (disp))
13311 return true;
13313 if (GET_CODE (disp) != UNSPEC)
13314 return false;
13316 switch (XINT (disp, 1))
13318 case UNSPEC_GOT:
13319 if (saw_plus)
13320 return false;
13321 /* We need to check for both symbols and labels because VxWorks loads
13322 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
13323 details. */
13324 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13325 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13326 case UNSPEC_GOTOFF:
13327 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13328 While ABI specify also 32bit relocation but we don't produce it in
13329 small PIC model at all. */
13330 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13331 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13332 && !TARGET_64BIT)
13333 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13334 return false;
13335 case UNSPEC_GOTTPOFF:
13336 case UNSPEC_GOTNTPOFF:
13337 case UNSPEC_INDNTPOFF:
13338 if (saw_plus)
13339 return false;
13340 disp = XVECEXP (disp, 0, 0);
13341 return (GET_CODE (disp) == SYMBOL_REF
13342 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13343 case UNSPEC_NTPOFF:
13344 disp = XVECEXP (disp, 0, 0);
13345 return (GET_CODE (disp) == SYMBOL_REF
13346 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13347 case UNSPEC_DTPOFF:
13348 disp = XVECEXP (disp, 0, 0);
13349 return (GET_CODE (disp) == SYMBOL_REF
13350 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13353 return false;
13356 /* Determine if op is suitable RTX for an address register.
13357 Return naked register if a register or a register subreg is
13358 found, otherwise return NULL_RTX. */
13360 static rtx
13361 ix86_validate_address_register (rtx op)
13363 machine_mode mode = GET_MODE (op);
13365 /* Only SImode or DImode registers can form the address. */
13366 if (mode != SImode && mode != DImode)
13367 return NULL_RTX;
13369 if (REG_P (op))
13370 return op;
13371 else if (GET_CODE (op) == SUBREG)
13373 rtx reg = SUBREG_REG (op);
13375 if (!REG_P (reg))
13376 return NULL_RTX;
13378 mode = GET_MODE (reg);
13380 /* Don't allow SUBREGs that span more than a word. It can
13381 lead to spill failures when the register is one word out
13382 of a two word structure. */
13383 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13384 return NULL_RTX;
13386 /* Allow only SUBREGs of non-eliminable hard registers. */
13387 if (register_no_elim_operand (reg, mode))
13388 return reg;
13391 /* Op is not a register. */
13392 return NULL_RTX;
13395 /* Recognizes RTL expressions that are valid memory addresses for an
13396 instruction. The MODE argument is the machine mode for the MEM
13397 expression that wants to use this address.
13399 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13400 convert common non-canonical forms to canonical form so that they will
13401 be recognized. */
13403 static bool
13404 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13406 struct ix86_address parts;
13407 rtx base, index, disp;
13408 HOST_WIDE_INT scale;
13409 enum ix86_address_seg seg;
13411 if (ix86_decompose_address (addr, &parts) <= 0)
13412 /* Decomposition failed. */
13413 return false;
13415 base = parts.base;
13416 index = parts.index;
13417 disp = parts.disp;
13418 scale = parts.scale;
13419 seg = parts.seg;
13421 /* Validate base register. */
13422 if (base)
13424 rtx reg = ix86_validate_address_register (base);
13426 if (reg == NULL_RTX)
13427 return false;
13429 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13430 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13431 /* Base is not valid. */
13432 return false;
13435 /* Validate index register. */
13436 if (index)
13438 rtx reg = ix86_validate_address_register (index);
13440 if (reg == NULL_RTX)
13441 return false;
13443 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13444 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13445 /* Index is not valid. */
13446 return false;
13449 /* Index and base should have the same mode. */
13450 if (base && index
13451 && GET_MODE (base) != GET_MODE (index))
13452 return false;
13454 /* Address override works only on the (%reg) part of %fs:(%reg). */
13455 if (seg != SEG_DEFAULT
13456 && ((base && GET_MODE (base) != word_mode)
13457 || (index && GET_MODE (index) != word_mode)))
13458 return false;
13460 /* Validate scale factor. */
13461 if (scale != 1)
13463 if (!index)
13464 /* Scale without index. */
13465 return false;
13467 if (scale != 2 && scale != 4 && scale != 8)
13468 /* Scale is not a valid multiplier. */
13469 return false;
13472 /* Validate displacement. */
13473 if (disp)
13475 if (GET_CODE (disp) == CONST
13476 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13477 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13478 switch (XINT (XEXP (disp, 0), 1))
13480 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13481 used. While ABI specify also 32bit relocations, we don't produce
13482 them at all and use IP relative instead. */
13483 case UNSPEC_GOT:
13484 case UNSPEC_GOTOFF:
13485 gcc_assert (flag_pic);
13486 if (!TARGET_64BIT)
13487 goto is_legitimate_pic;
13489 /* 64bit address unspec. */
13490 return false;
13492 case UNSPEC_GOTPCREL:
13493 case UNSPEC_PCREL:
13494 gcc_assert (flag_pic);
13495 goto is_legitimate_pic;
13497 case UNSPEC_GOTTPOFF:
13498 case UNSPEC_GOTNTPOFF:
13499 case UNSPEC_INDNTPOFF:
13500 case UNSPEC_NTPOFF:
13501 case UNSPEC_DTPOFF:
13502 break;
13504 case UNSPEC_STACK_CHECK:
13505 gcc_assert (flag_split_stack);
13506 break;
13508 default:
13509 /* Invalid address unspec. */
13510 return false;
13513 else if (SYMBOLIC_CONST (disp)
13514 && (flag_pic
13515 || (TARGET_MACHO
13516 #if TARGET_MACHO
13517 && MACHOPIC_INDIRECT
13518 && !machopic_operand_p (disp)
13519 #endif
13523 is_legitimate_pic:
13524 if (TARGET_64BIT && (index || base))
13526 /* foo@dtpoff(%rX) is ok. */
13527 if (GET_CODE (disp) != CONST
13528 || GET_CODE (XEXP (disp, 0)) != PLUS
13529 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13530 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13531 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13532 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13533 /* Non-constant pic memory reference. */
13534 return false;
13536 else if ((!TARGET_MACHO || flag_pic)
13537 && ! legitimate_pic_address_disp_p (disp))
13538 /* Displacement is an invalid pic construct. */
13539 return false;
13540 #if TARGET_MACHO
13541 else if (MACHO_DYNAMIC_NO_PIC_P
13542 && !ix86_legitimate_constant_p (Pmode, disp))
13543 /* displacment must be referenced via non_lazy_pointer */
13544 return false;
13545 #endif
13547 /* This code used to verify that a symbolic pic displacement
13548 includes the pic_offset_table_rtx register.
13550 While this is good idea, unfortunately these constructs may
13551 be created by "adds using lea" optimization for incorrect
13552 code like:
13554 int a;
13555 int foo(int i)
13557 return *(&a+i);
13560 This code is nonsensical, but results in addressing
13561 GOT table with pic_offset_table_rtx base. We can't
13562 just refuse it easily, since it gets matched by
13563 "addsi3" pattern, that later gets split to lea in the
13564 case output register differs from input. While this
13565 can be handled by separate addsi pattern for this case
13566 that never results in lea, this seems to be easier and
13567 correct fix for crash to disable this test. */
13569 else if (GET_CODE (disp) != LABEL_REF
13570 && !CONST_INT_P (disp)
13571 && (GET_CODE (disp) != CONST
13572 || !ix86_legitimate_constant_p (Pmode, disp))
13573 && (GET_CODE (disp) != SYMBOL_REF
13574 || !ix86_legitimate_constant_p (Pmode, disp)))
13575 /* Displacement is not constant. */
13576 return false;
13577 else if (TARGET_64BIT
13578 && !x86_64_immediate_operand (disp, VOIDmode))
13579 /* Displacement is out of range. */
13580 return false;
13581 /* In x32 mode, constant addresses are sign extended to 64bit, so
13582 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13583 else if (TARGET_X32 && !(index || base)
13584 && CONST_INT_P (disp)
13585 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13586 return false;
13589 /* Everything looks valid. */
13590 return true;
13593 /* Determine if a given RTX is a valid constant address. */
13595 bool
13596 constant_address_p (rtx x)
13598 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13601 /* Return a unique alias set for the GOT. */
13603 static alias_set_type
13604 ix86_GOT_alias_set (void)
13606 static alias_set_type set = -1;
13607 if (set == -1)
13608 set = new_alias_set ();
13609 return set;
13612 /* Return a legitimate reference for ORIG (an address) using the
13613 register REG. If REG is 0, a new pseudo is generated.
13615 There are two types of references that must be handled:
13617 1. Global data references must load the address from the GOT, via
13618 the PIC reg. An insn is emitted to do this load, and the reg is
13619 returned.
13621 2. Static data references, constant pool addresses, and code labels
13622 compute the address as an offset from the GOT, whose base is in
13623 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13624 differentiate them from global data objects. The returned
13625 address is the PIC reg + an unspec constant.
13627 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13628 reg also appears in the address. */
13630 static rtx
13631 legitimize_pic_address (rtx orig, rtx reg)
13633 rtx addr = orig;
13634 rtx new_rtx = orig;
13636 #if TARGET_MACHO
13637 if (TARGET_MACHO && !TARGET_64BIT)
13639 if (reg == 0)
13640 reg = gen_reg_rtx (Pmode);
13641 /* Use the generic Mach-O PIC machinery. */
13642 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13644 #endif
13646 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13648 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13649 if (tmp)
13650 return tmp;
13653 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13654 new_rtx = addr;
13655 else if (TARGET_64BIT && !TARGET_PECOFF
13656 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13658 rtx tmpreg;
13659 /* This symbol may be referenced via a displacement from the PIC
13660 base address (@GOTOFF). */
13662 if (GET_CODE (addr) == CONST)
13663 addr = XEXP (addr, 0);
13664 if (GET_CODE (addr) == PLUS)
13666 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13667 UNSPEC_GOTOFF);
13668 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13670 else
13671 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13672 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13673 if (!reg)
13674 tmpreg = gen_reg_rtx (Pmode);
13675 else
13676 tmpreg = reg;
13677 emit_move_insn (tmpreg, new_rtx);
13679 if (reg != 0)
13681 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13682 tmpreg, 1, OPTAB_DIRECT);
13683 new_rtx = reg;
13685 else
13686 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13688 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13690 /* This symbol may be referenced via a displacement from the PIC
13691 base address (@GOTOFF). */
13693 if (GET_CODE (addr) == CONST)
13694 addr = XEXP (addr, 0);
13695 if (GET_CODE (addr) == PLUS)
13697 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13698 UNSPEC_GOTOFF);
13699 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13701 else
13702 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13703 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13704 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13706 if (reg != 0)
13708 emit_move_insn (reg, new_rtx);
13709 new_rtx = reg;
13712 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13713 /* We can't use @GOTOFF for text labels on VxWorks;
13714 see gotoff_operand. */
13715 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13717 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13718 if (tmp)
13719 return tmp;
13721 /* For x64 PE-COFF there is no GOT table. So we use address
13722 directly. */
13723 if (TARGET_64BIT && TARGET_PECOFF)
13725 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13726 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13728 if (reg == 0)
13729 reg = gen_reg_rtx (Pmode);
13730 emit_move_insn (reg, new_rtx);
13731 new_rtx = reg;
13733 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13735 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13736 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13737 new_rtx = gen_const_mem (Pmode, new_rtx);
13738 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13740 if (reg == 0)
13741 reg = gen_reg_rtx (Pmode);
13742 /* Use directly gen_movsi, otherwise the address is loaded
13743 into register for CSE. We don't want to CSE this addresses,
13744 instead we CSE addresses from the GOT table, so skip this. */
13745 emit_insn (gen_movsi (reg, new_rtx));
13746 new_rtx = reg;
13748 else
13750 /* This symbol must be referenced via a load from the
13751 Global Offset Table (@GOT). */
13753 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13754 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13755 if (TARGET_64BIT)
13756 new_rtx = force_reg (Pmode, new_rtx);
13757 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13758 new_rtx = gen_const_mem (Pmode, new_rtx);
13759 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13761 if (reg == 0)
13762 reg = gen_reg_rtx (Pmode);
13763 emit_move_insn (reg, new_rtx);
13764 new_rtx = reg;
13767 else
13769 if (CONST_INT_P (addr)
13770 && !x86_64_immediate_operand (addr, VOIDmode))
13772 if (reg)
13774 emit_move_insn (reg, addr);
13775 new_rtx = reg;
13777 else
13778 new_rtx = force_reg (Pmode, addr);
13780 else if (GET_CODE (addr) == CONST)
13782 addr = XEXP (addr, 0);
13784 /* We must match stuff we generate before. Assume the only
13785 unspecs that can get here are ours. Not that we could do
13786 anything with them anyway.... */
13787 if (GET_CODE (addr) == UNSPEC
13788 || (GET_CODE (addr) == PLUS
13789 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13790 return orig;
13791 gcc_assert (GET_CODE (addr) == PLUS);
13793 if (GET_CODE (addr) == PLUS)
13795 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13797 /* Check first to see if this is a constant offset from a @GOTOFF
13798 symbol reference. */
13799 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13800 && CONST_INT_P (op1))
13802 if (!TARGET_64BIT)
13804 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13805 UNSPEC_GOTOFF);
13806 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13807 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13808 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13810 if (reg != 0)
13812 emit_move_insn (reg, new_rtx);
13813 new_rtx = reg;
13816 else
13818 if (INTVAL (op1) < -16*1024*1024
13819 || INTVAL (op1) >= 16*1024*1024)
13821 if (!x86_64_immediate_operand (op1, Pmode))
13822 op1 = force_reg (Pmode, op1);
13823 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13827 else
13829 rtx base = legitimize_pic_address (op0, reg);
13830 machine_mode mode = GET_MODE (base);
13831 new_rtx
13832 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13834 if (CONST_INT_P (new_rtx))
13836 if (INTVAL (new_rtx) < -16*1024*1024
13837 || INTVAL (new_rtx) >= 16*1024*1024)
13839 if (!x86_64_immediate_operand (new_rtx, mode))
13840 new_rtx = force_reg (mode, new_rtx);
13841 new_rtx
13842 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13844 else
13845 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13847 else
13849 /* For %rip addressing, we have to use just disp32, not
13850 base nor index. */
13851 if (TARGET_64BIT
13852 && (GET_CODE (base) == SYMBOL_REF
13853 || GET_CODE (base) == LABEL_REF))
13854 base = force_reg (mode, base);
13855 if (GET_CODE (new_rtx) == PLUS
13856 && CONSTANT_P (XEXP (new_rtx, 1)))
13858 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13859 new_rtx = XEXP (new_rtx, 1);
13861 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13866 return new_rtx;
13869 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13871 static rtx
13872 get_thread_pointer (machine_mode tp_mode, bool to_reg)
13874 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13876 if (GET_MODE (tp) != tp_mode)
13878 gcc_assert (GET_MODE (tp) == SImode);
13879 gcc_assert (tp_mode == DImode);
13881 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13884 if (to_reg)
13885 tp = copy_to_mode_reg (tp_mode, tp);
13887 return tp;
13890 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13892 static GTY(()) rtx ix86_tls_symbol;
13894 static rtx
13895 ix86_tls_get_addr (void)
13897 if (!ix86_tls_symbol)
13899 const char *sym
13900 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13901 ? "___tls_get_addr" : "__tls_get_addr");
13903 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13906 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13908 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13909 UNSPEC_PLTOFF);
13910 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13911 gen_rtx_CONST (Pmode, unspec));
13914 return ix86_tls_symbol;
13917 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13919 static GTY(()) rtx ix86_tls_module_base_symbol;
13922 ix86_tls_module_base (void)
13924 if (!ix86_tls_module_base_symbol)
13926 ix86_tls_module_base_symbol
13927 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13929 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13930 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13933 return ix86_tls_module_base_symbol;
13936 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13937 false if we expect this to be used for a memory address and true if
13938 we expect to load the address into a register. */
13940 static rtx
13941 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13943 rtx dest, base, off;
13944 rtx pic = NULL_RTX, tp = NULL_RTX;
13945 machine_mode tp_mode = Pmode;
13946 int type;
13948 /* Fall back to global dynamic model if tool chain cannot support local
13949 dynamic. */
13950 if (TARGET_SUN_TLS && !TARGET_64BIT
13951 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13952 && model == TLS_MODEL_LOCAL_DYNAMIC)
13953 model = TLS_MODEL_GLOBAL_DYNAMIC;
13955 switch (model)
13957 case TLS_MODEL_GLOBAL_DYNAMIC:
13958 dest = gen_reg_rtx (Pmode);
13960 if (!TARGET_64BIT)
13962 if (flag_pic && !TARGET_PECOFF)
13963 pic = pic_offset_table_rtx;
13964 else
13966 pic = gen_reg_rtx (Pmode);
13967 emit_insn (gen_set_got (pic));
13971 if (TARGET_GNU2_TLS)
13973 if (TARGET_64BIT)
13974 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13975 else
13976 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13978 tp = get_thread_pointer (Pmode, true);
13979 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13981 if (GET_MODE (x) != Pmode)
13982 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13984 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13986 else
13988 rtx caddr = ix86_tls_get_addr ();
13990 if (TARGET_64BIT)
13992 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13993 rtx_insn *insns;
13995 start_sequence ();
13996 emit_call_insn
13997 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13998 insns = get_insns ();
13999 end_sequence ();
14001 if (GET_MODE (x) != Pmode)
14002 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14004 RTL_CONST_CALL_P (insns) = 1;
14005 emit_libcall_block (insns, dest, rax, x);
14007 else
14008 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
14010 break;
14012 case TLS_MODEL_LOCAL_DYNAMIC:
14013 base = gen_reg_rtx (Pmode);
14015 if (!TARGET_64BIT)
14017 if (flag_pic)
14018 pic = pic_offset_table_rtx;
14019 else
14021 pic = gen_reg_rtx (Pmode);
14022 emit_insn (gen_set_got (pic));
14026 if (TARGET_GNU2_TLS)
14028 rtx tmp = ix86_tls_module_base ();
14030 if (TARGET_64BIT)
14031 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
14032 else
14033 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
14035 tp = get_thread_pointer (Pmode, true);
14036 set_unique_reg_note (get_last_insn (), REG_EQUAL,
14037 gen_rtx_MINUS (Pmode, tmp, tp));
14039 else
14041 rtx caddr = ix86_tls_get_addr ();
14043 if (TARGET_64BIT)
14045 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14046 rtx_insn *insns;
14047 rtx eqv;
14049 start_sequence ();
14050 emit_call_insn
14051 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
14052 insns = get_insns ();
14053 end_sequence ();
14055 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
14056 share the LD_BASE result with other LD model accesses. */
14057 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
14058 UNSPEC_TLS_LD_BASE);
14060 RTL_CONST_CALL_P (insns) = 1;
14061 emit_libcall_block (insns, base, rax, eqv);
14063 else
14064 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
14067 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
14068 off = gen_rtx_CONST (Pmode, off);
14070 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
14072 if (TARGET_GNU2_TLS)
14074 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
14076 if (GET_MODE (x) != Pmode)
14077 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14079 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14081 break;
14083 case TLS_MODEL_INITIAL_EXEC:
14084 if (TARGET_64BIT)
14086 if (TARGET_SUN_TLS && !TARGET_X32)
14088 /* The Sun linker took the AMD64 TLS spec literally
14089 and can only handle %rax as destination of the
14090 initial executable code sequence. */
14092 dest = gen_reg_rtx (DImode);
14093 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
14094 return dest;
14097 /* Generate DImode references to avoid %fs:(%reg32)
14098 problems and linker IE->LE relaxation bug. */
14099 tp_mode = DImode;
14100 pic = NULL;
14101 type = UNSPEC_GOTNTPOFF;
14103 else if (flag_pic)
14105 pic = pic_offset_table_rtx;
14106 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
14108 else if (!TARGET_ANY_GNU_TLS)
14110 pic = gen_reg_rtx (Pmode);
14111 emit_insn (gen_set_got (pic));
14112 type = UNSPEC_GOTTPOFF;
14114 else
14116 pic = NULL;
14117 type = UNSPEC_INDNTPOFF;
14120 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
14121 off = gen_rtx_CONST (tp_mode, off);
14122 if (pic)
14123 off = gen_rtx_PLUS (tp_mode, pic, off);
14124 off = gen_const_mem (tp_mode, off);
14125 set_mem_alias_set (off, ix86_GOT_alias_set ());
14127 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14129 base = get_thread_pointer (tp_mode,
14130 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14131 off = force_reg (tp_mode, off);
14132 return gen_rtx_PLUS (tp_mode, base, off);
14134 else
14136 base = get_thread_pointer (Pmode, true);
14137 dest = gen_reg_rtx (Pmode);
14138 emit_insn (ix86_gen_sub3 (dest, base, off));
14140 break;
14142 case TLS_MODEL_LOCAL_EXEC:
14143 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14144 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14145 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14146 off = gen_rtx_CONST (Pmode, off);
14148 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14150 base = get_thread_pointer (Pmode,
14151 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14152 return gen_rtx_PLUS (Pmode, base, off);
14154 else
14156 base = get_thread_pointer (Pmode, true);
14157 dest = gen_reg_rtx (Pmode);
14158 emit_insn (ix86_gen_sub3 (dest, base, off));
14160 break;
14162 default:
14163 gcc_unreachable ();
14166 return dest;
14169 /* Create or return the unique __imp_DECL dllimport symbol corresponding
14170 to symbol DECL if BEIMPORT is true. Otherwise create or return the
14171 unique refptr-DECL symbol corresponding to symbol DECL. */
14173 struct dllimport_hasher : ggc_cache_hasher<tree_map *>
14175 static inline hashval_t hash (tree_map *m) { return m->hash; }
14176 static inline bool
14177 equal (tree_map *a, tree_map *b)
14179 return a->base.from == b->base.from;
14182 static void
14183 handle_cache_entry (tree_map *&m)
14185 extern void gt_ggc_mx (tree_map *&);
14186 if (m == HTAB_EMPTY_ENTRY || m == HTAB_DELETED_ENTRY)
14187 return;
14188 else if (ggc_marked_p (m->base.from))
14189 gt_ggc_mx (m);
14190 else
14191 m = static_cast<tree_map *> (HTAB_DELETED_ENTRY);
14195 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
14197 static tree
14198 get_dllimport_decl (tree decl, bool beimport)
14200 struct tree_map *h, in;
14201 const char *name;
14202 const char *prefix;
14203 size_t namelen, prefixlen;
14204 char *imp_name;
14205 tree to;
14206 rtx rtl;
14208 if (!dllimport_map)
14209 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
14211 in.hash = htab_hash_pointer (decl);
14212 in.base.from = decl;
14213 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
14214 h = *loc;
14215 if (h)
14216 return h->to;
14218 *loc = h = ggc_alloc<tree_map> ();
14219 h->hash = in.hash;
14220 h->base.from = decl;
14221 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14222 VAR_DECL, NULL, ptr_type_node);
14223 DECL_ARTIFICIAL (to) = 1;
14224 DECL_IGNORED_P (to) = 1;
14225 DECL_EXTERNAL (to) = 1;
14226 TREE_READONLY (to) = 1;
14228 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14229 name = targetm.strip_name_encoding (name);
14230 if (beimport)
14231 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14232 ? "*__imp_" : "*__imp__";
14233 else
14234 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14235 namelen = strlen (name);
14236 prefixlen = strlen (prefix);
14237 imp_name = (char *) alloca (namelen + prefixlen + 1);
14238 memcpy (imp_name, prefix, prefixlen);
14239 memcpy (imp_name + prefixlen, name, namelen + 1);
14241 name = ggc_alloc_string (imp_name, namelen + prefixlen);
14242 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14243 SET_SYMBOL_REF_DECL (rtl, to);
14244 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14245 if (!beimport)
14247 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14248 #ifdef SUB_TARGET_RECORD_STUB
14249 SUB_TARGET_RECORD_STUB (name);
14250 #endif
14253 rtl = gen_const_mem (Pmode, rtl);
14254 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14256 SET_DECL_RTL (to, rtl);
14257 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14259 return to;
14262 /* Expand SYMBOL into its corresponding far-addresse symbol.
14263 WANT_REG is true if we require the result be a register. */
14265 static rtx
14266 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14268 tree imp_decl;
14269 rtx x;
14271 gcc_assert (SYMBOL_REF_DECL (symbol));
14272 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14274 x = DECL_RTL (imp_decl);
14275 if (want_reg)
14276 x = force_reg (Pmode, x);
14277 return x;
14280 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
14281 true if we require the result be a register. */
14283 static rtx
14284 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14286 tree imp_decl;
14287 rtx x;
14289 gcc_assert (SYMBOL_REF_DECL (symbol));
14290 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14292 x = DECL_RTL (imp_decl);
14293 if (want_reg)
14294 x = force_reg (Pmode, x);
14295 return x;
14298 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
14299 is true if we require the result be a register. */
14301 static rtx
14302 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14304 if (!TARGET_PECOFF)
14305 return NULL_RTX;
14307 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14309 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14310 return legitimize_dllimport_symbol (addr, inreg);
14311 if (GET_CODE (addr) == CONST
14312 && GET_CODE (XEXP (addr, 0)) == PLUS
14313 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14314 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14316 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14317 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14321 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14322 return NULL_RTX;
14323 if (GET_CODE (addr) == SYMBOL_REF
14324 && !is_imported_p (addr)
14325 && SYMBOL_REF_EXTERNAL_P (addr)
14326 && SYMBOL_REF_DECL (addr))
14327 return legitimize_pe_coff_extern_decl (addr, inreg);
14329 if (GET_CODE (addr) == CONST
14330 && GET_CODE (XEXP (addr, 0)) == PLUS
14331 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14332 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14333 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14334 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14336 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14337 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14339 return NULL_RTX;
14342 /* Try machine-dependent ways of modifying an illegitimate address
14343 to be legitimate. If we find one, return the new, valid address.
14344 This macro is used in only one place: `memory_address' in explow.c.
14346 OLDX is the address as it was before break_out_memory_refs was called.
14347 In some cases it is useful to look at this to decide what needs to be done.
14349 It is always safe for this macro to do nothing. It exists to recognize
14350 opportunities to optimize the output.
14352 For the 80386, we handle X+REG by loading X into a register R and
14353 using R+REG. R will go in a general reg and indexing will be used.
14354 However, if REG is a broken-out memory address or multiplication,
14355 nothing needs to be done because REG can certainly go in a general reg.
14357 When -fpic is used, special handling is needed for symbolic references.
14358 See comments by legitimize_pic_address in i386.c for details. */
14360 static rtx
14361 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14363 bool changed = false;
14364 unsigned log;
14366 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14367 if (log)
14368 return legitimize_tls_address (x, (enum tls_model) log, false);
14369 if (GET_CODE (x) == CONST
14370 && GET_CODE (XEXP (x, 0)) == PLUS
14371 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14372 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14374 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14375 (enum tls_model) log, false);
14376 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14379 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14381 rtx tmp = legitimize_pe_coff_symbol (x, true);
14382 if (tmp)
14383 return tmp;
14386 if (flag_pic && SYMBOLIC_CONST (x))
14387 return legitimize_pic_address (x, 0);
14389 #if TARGET_MACHO
14390 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14391 return machopic_indirect_data_reference (x, 0);
14392 #endif
14394 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14395 if (GET_CODE (x) == ASHIFT
14396 && CONST_INT_P (XEXP (x, 1))
14397 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14399 changed = true;
14400 log = INTVAL (XEXP (x, 1));
14401 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14402 GEN_INT (1 << log));
14405 if (GET_CODE (x) == PLUS)
14407 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14409 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14410 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14411 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14413 changed = true;
14414 log = INTVAL (XEXP (XEXP (x, 0), 1));
14415 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14416 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14417 GEN_INT (1 << log));
14420 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14421 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14422 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14424 changed = true;
14425 log = INTVAL (XEXP (XEXP (x, 1), 1));
14426 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14427 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14428 GEN_INT (1 << log));
14431 /* Put multiply first if it isn't already. */
14432 if (GET_CODE (XEXP (x, 1)) == MULT)
14434 std::swap (XEXP (x, 0), XEXP (x, 1));
14435 changed = true;
14438 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14439 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14440 created by virtual register instantiation, register elimination, and
14441 similar optimizations. */
14442 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14444 changed = true;
14445 x = gen_rtx_PLUS (Pmode,
14446 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14447 XEXP (XEXP (x, 1), 0)),
14448 XEXP (XEXP (x, 1), 1));
14451 /* Canonicalize
14452 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14453 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14454 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14455 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14456 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14457 && CONSTANT_P (XEXP (x, 1)))
14459 rtx constant;
14460 rtx other = NULL_RTX;
14462 if (CONST_INT_P (XEXP (x, 1)))
14464 constant = XEXP (x, 1);
14465 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14467 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14469 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14470 other = XEXP (x, 1);
14472 else
14473 constant = 0;
14475 if (constant)
14477 changed = true;
14478 x = gen_rtx_PLUS (Pmode,
14479 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14480 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14481 plus_constant (Pmode, other,
14482 INTVAL (constant)));
14486 if (changed && ix86_legitimate_address_p (mode, x, false))
14487 return x;
14489 if (GET_CODE (XEXP (x, 0)) == MULT)
14491 changed = true;
14492 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14495 if (GET_CODE (XEXP (x, 1)) == MULT)
14497 changed = true;
14498 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14501 if (changed
14502 && REG_P (XEXP (x, 1))
14503 && REG_P (XEXP (x, 0)))
14504 return x;
14506 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14508 changed = true;
14509 x = legitimize_pic_address (x, 0);
14512 if (changed && ix86_legitimate_address_p (mode, x, false))
14513 return x;
14515 if (REG_P (XEXP (x, 0)))
14517 rtx temp = gen_reg_rtx (Pmode);
14518 rtx val = force_operand (XEXP (x, 1), temp);
14519 if (val != temp)
14521 val = convert_to_mode (Pmode, val, 1);
14522 emit_move_insn (temp, val);
14525 XEXP (x, 1) = temp;
14526 return x;
14529 else if (REG_P (XEXP (x, 1)))
14531 rtx temp = gen_reg_rtx (Pmode);
14532 rtx val = force_operand (XEXP (x, 0), temp);
14533 if (val != temp)
14535 val = convert_to_mode (Pmode, val, 1);
14536 emit_move_insn (temp, val);
14539 XEXP (x, 0) = temp;
14540 return x;
14544 return x;
14547 /* Print an integer constant expression in assembler syntax. Addition
14548 and subtraction are the only arithmetic that may appear in these
14549 expressions. FILE is the stdio stream to write to, X is the rtx, and
14550 CODE is the operand print code from the output string. */
14552 static void
14553 output_pic_addr_const (FILE *file, rtx x, int code)
14555 char buf[256];
14557 switch (GET_CODE (x))
14559 case PC:
14560 gcc_assert (flag_pic);
14561 putc ('.', file);
14562 break;
14564 case SYMBOL_REF:
14565 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14566 output_addr_const (file, x);
14567 else
14569 const char *name = XSTR (x, 0);
14571 /* Mark the decl as referenced so that cgraph will
14572 output the function. */
14573 if (SYMBOL_REF_DECL (x))
14574 mark_decl_referenced (SYMBOL_REF_DECL (x));
14576 #if TARGET_MACHO
14577 if (MACHOPIC_INDIRECT
14578 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14579 name = machopic_indirection_name (x, /*stub_p=*/true);
14580 #endif
14581 assemble_name (file, name);
14583 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14584 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14585 fputs ("@PLT", file);
14586 break;
14588 case LABEL_REF:
14589 x = XEXP (x, 0);
14590 /* FALLTHRU */
14591 case CODE_LABEL:
14592 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14593 assemble_name (asm_out_file, buf);
14594 break;
14596 case CONST_INT:
14597 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14598 break;
14600 case CONST:
14601 /* This used to output parentheses around the expression,
14602 but that does not work on the 386 (either ATT or BSD assembler). */
14603 output_pic_addr_const (file, XEXP (x, 0), code);
14604 break;
14606 case CONST_DOUBLE:
14607 /* We can't handle floating point constants;
14608 TARGET_PRINT_OPERAND must handle them. */
14609 output_operand_lossage ("floating constant misused");
14610 break;
14612 case PLUS:
14613 /* Some assemblers need integer constants to appear first. */
14614 if (CONST_INT_P (XEXP (x, 0)))
14616 output_pic_addr_const (file, XEXP (x, 0), code);
14617 putc ('+', file);
14618 output_pic_addr_const (file, XEXP (x, 1), code);
14620 else
14622 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14623 output_pic_addr_const (file, XEXP (x, 1), code);
14624 putc ('+', file);
14625 output_pic_addr_const (file, XEXP (x, 0), code);
14627 break;
14629 case MINUS:
14630 if (!TARGET_MACHO)
14631 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14632 output_pic_addr_const (file, XEXP (x, 0), code);
14633 putc ('-', file);
14634 output_pic_addr_const (file, XEXP (x, 1), code);
14635 if (!TARGET_MACHO)
14636 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14637 break;
14639 case UNSPEC:
14640 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14642 bool f = i386_asm_output_addr_const_extra (file, x);
14643 gcc_assert (f);
14644 break;
14647 gcc_assert (XVECLEN (x, 0) == 1);
14648 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14649 switch (XINT (x, 1))
14651 case UNSPEC_GOT:
14652 fputs ("@GOT", file);
14653 break;
14654 case UNSPEC_GOTOFF:
14655 fputs ("@GOTOFF", file);
14656 break;
14657 case UNSPEC_PLTOFF:
14658 fputs ("@PLTOFF", file);
14659 break;
14660 case UNSPEC_PCREL:
14661 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14662 "(%rip)" : "[rip]", file);
14663 break;
14664 case UNSPEC_GOTPCREL:
14665 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14666 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14667 break;
14668 case UNSPEC_GOTTPOFF:
14669 /* FIXME: This might be @TPOFF in Sun ld too. */
14670 fputs ("@gottpoff", file);
14671 break;
14672 case UNSPEC_TPOFF:
14673 fputs ("@tpoff", file);
14674 break;
14675 case UNSPEC_NTPOFF:
14676 if (TARGET_64BIT)
14677 fputs ("@tpoff", file);
14678 else
14679 fputs ("@ntpoff", file);
14680 break;
14681 case UNSPEC_DTPOFF:
14682 fputs ("@dtpoff", file);
14683 break;
14684 case UNSPEC_GOTNTPOFF:
14685 if (TARGET_64BIT)
14686 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14687 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14688 else
14689 fputs ("@gotntpoff", file);
14690 break;
14691 case UNSPEC_INDNTPOFF:
14692 fputs ("@indntpoff", file);
14693 break;
14694 #if TARGET_MACHO
14695 case UNSPEC_MACHOPIC_OFFSET:
14696 putc ('-', file);
14697 machopic_output_function_base_name (file);
14698 break;
14699 #endif
14700 default:
14701 output_operand_lossage ("invalid UNSPEC as operand");
14702 break;
14704 break;
14706 default:
14707 output_operand_lossage ("invalid expression as operand");
14711 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14712 We need to emit DTP-relative relocations. */
14714 static void ATTRIBUTE_UNUSED
14715 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14717 fputs (ASM_LONG, file);
14718 output_addr_const (file, x);
14719 fputs ("@dtpoff", file);
14720 switch (size)
14722 case 4:
14723 break;
14724 case 8:
14725 fputs (", 0", file);
14726 break;
14727 default:
14728 gcc_unreachable ();
14732 /* Return true if X is a representation of the PIC register. This copes
14733 with calls from ix86_find_base_term, where the register might have
14734 been replaced by a cselib value. */
14736 static bool
14737 ix86_pic_register_p (rtx x)
14739 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14740 return (pic_offset_table_rtx
14741 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14742 else if (!REG_P (x))
14743 return false;
14744 else if (pic_offset_table_rtx)
14746 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14747 return true;
14748 if (HARD_REGISTER_P (x)
14749 && !HARD_REGISTER_P (pic_offset_table_rtx)
14750 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14751 return true;
14752 return false;
14754 else
14755 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14758 /* Helper function for ix86_delegitimize_address.
14759 Attempt to delegitimize TLS local-exec accesses. */
14761 static rtx
14762 ix86_delegitimize_tls_address (rtx orig_x)
14764 rtx x = orig_x, unspec;
14765 struct ix86_address addr;
14767 if (!TARGET_TLS_DIRECT_SEG_REFS)
14768 return orig_x;
14769 if (MEM_P (x))
14770 x = XEXP (x, 0);
14771 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14772 return orig_x;
14773 if (ix86_decompose_address (x, &addr) == 0
14774 || addr.seg != DEFAULT_TLS_SEG_REG
14775 || addr.disp == NULL_RTX
14776 || GET_CODE (addr.disp) != CONST)
14777 return orig_x;
14778 unspec = XEXP (addr.disp, 0);
14779 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14780 unspec = XEXP (unspec, 0);
14781 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14782 return orig_x;
14783 x = XVECEXP (unspec, 0, 0);
14784 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14785 if (unspec != XEXP (addr.disp, 0))
14786 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14787 if (addr.index)
14789 rtx idx = addr.index;
14790 if (addr.scale != 1)
14791 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14792 x = gen_rtx_PLUS (Pmode, idx, x);
14794 if (addr.base)
14795 x = gen_rtx_PLUS (Pmode, addr.base, x);
14796 if (MEM_P (orig_x))
14797 x = replace_equiv_address_nv (orig_x, x);
14798 return x;
14801 /* In the name of slightly smaller debug output, and to cater to
14802 general assembler lossage, recognize PIC+GOTOFF and turn it back
14803 into a direct symbol reference.
14805 On Darwin, this is necessary to avoid a crash, because Darwin
14806 has a different PIC label for each routine but the DWARF debugging
14807 information is not associated with any particular routine, so it's
14808 necessary to remove references to the PIC label from RTL stored by
14809 the DWARF output code. */
14811 static rtx
14812 ix86_delegitimize_address (rtx x)
14814 rtx orig_x = delegitimize_mem_from_attrs (x);
14815 /* addend is NULL or some rtx if x is something+GOTOFF where
14816 something doesn't include the PIC register. */
14817 rtx addend = NULL_RTX;
14818 /* reg_addend is NULL or a multiple of some register. */
14819 rtx reg_addend = NULL_RTX;
14820 /* const_addend is NULL or a const_int. */
14821 rtx const_addend = NULL_RTX;
14822 /* This is the result, or NULL. */
14823 rtx result = NULL_RTX;
14825 x = orig_x;
14827 if (MEM_P (x))
14828 x = XEXP (x, 0);
14830 if (TARGET_64BIT)
14832 if (GET_CODE (x) == CONST
14833 && GET_CODE (XEXP (x, 0)) == PLUS
14834 && GET_MODE (XEXP (x, 0)) == Pmode
14835 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14836 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14837 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14839 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14840 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14841 if (MEM_P (orig_x))
14842 x = replace_equiv_address_nv (orig_x, x);
14843 return x;
14846 if (GET_CODE (x) == CONST
14847 && GET_CODE (XEXP (x, 0)) == UNSPEC
14848 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14849 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14850 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14852 x = XVECEXP (XEXP (x, 0), 0, 0);
14853 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14855 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14856 GET_MODE (x), 0);
14857 if (x == NULL_RTX)
14858 return orig_x;
14860 return x;
14863 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14864 return ix86_delegitimize_tls_address (orig_x);
14866 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14867 and -mcmodel=medium -fpic. */
14870 if (GET_CODE (x) != PLUS
14871 || GET_CODE (XEXP (x, 1)) != CONST)
14872 return ix86_delegitimize_tls_address (orig_x);
14874 if (ix86_pic_register_p (XEXP (x, 0)))
14875 /* %ebx + GOT/GOTOFF */
14877 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14879 /* %ebx + %reg * scale + GOT/GOTOFF */
14880 reg_addend = XEXP (x, 0);
14881 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14882 reg_addend = XEXP (reg_addend, 1);
14883 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14884 reg_addend = XEXP (reg_addend, 0);
14885 else
14887 reg_addend = NULL_RTX;
14888 addend = XEXP (x, 0);
14891 else
14892 addend = XEXP (x, 0);
14894 x = XEXP (XEXP (x, 1), 0);
14895 if (GET_CODE (x) == PLUS
14896 && CONST_INT_P (XEXP (x, 1)))
14898 const_addend = XEXP (x, 1);
14899 x = XEXP (x, 0);
14902 if (GET_CODE (x) == UNSPEC
14903 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14904 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14905 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14906 && !MEM_P (orig_x) && !addend)))
14907 result = XVECEXP (x, 0, 0);
14909 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14910 && !MEM_P (orig_x))
14911 result = XVECEXP (x, 0, 0);
14913 if (! result)
14914 return ix86_delegitimize_tls_address (orig_x);
14916 if (const_addend)
14917 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14918 if (reg_addend)
14919 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14920 if (addend)
14922 /* If the rest of original X doesn't involve the PIC register, add
14923 addend and subtract pic_offset_table_rtx. This can happen e.g.
14924 for code like:
14925 leal (%ebx, %ecx, 4), %ecx
14927 movl foo@GOTOFF(%ecx), %edx
14928 in which case we return (%ecx - %ebx) + foo
14929 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
14930 and reload has completed. */
14931 if (pic_offset_table_rtx
14932 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
14933 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14934 pic_offset_table_rtx),
14935 result);
14936 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
14938 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
14939 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
14940 result = gen_rtx_PLUS (Pmode, tmp, result);
14942 else
14943 return orig_x;
14945 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14947 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14948 if (result == NULL_RTX)
14949 return orig_x;
14951 return result;
14954 /* If X is a machine specific address (i.e. a symbol or label being
14955 referenced as a displacement from the GOT implemented using an
14956 UNSPEC), then return the base term. Otherwise return X. */
14959 ix86_find_base_term (rtx x)
14961 rtx term;
14963 if (TARGET_64BIT)
14965 if (GET_CODE (x) != CONST)
14966 return x;
14967 term = XEXP (x, 0);
14968 if (GET_CODE (term) == PLUS
14969 && CONST_INT_P (XEXP (term, 1)))
14970 term = XEXP (term, 0);
14971 if (GET_CODE (term) != UNSPEC
14972 || (XINT (term, 1) != UNSPEC_GOTPCREL
14973 && XINT (term, 1) != UNSPEC_PCREL))
14974 return x;
14976 return XVECEXP (term, 0, 0);
14979 return ix86_delegitimize_address (x);
14982 static void
14983 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
14984 bool fp, FILE *file)
14986 const char *suffix;
14988 if (mode == CCFPmode || mode == CCFPUmode)
14990 code = ix86_fp_compare_code_to_integer (code);
14991 mode = CCmode;
14993 if (reverse)
14994 code = reverse_condition (code);
14996 switch (code)
14998 case EQ:
14999 switch (mode)
15001 case CCAmode:
15002 suffix = "a";
15003 break;
15005 case CCCmode:
15006 suffix = "c";
15007 break;
15009 case CCOmode:
15010 suffix = "o";
15011 break;
15013 case CCSmode:
15014 suffix = "s";
15015 break;
15017 default:
15018 suffix = "e";
15020 break;
15021 case NE:
15022 switch (mode)
15024 case CCAmode:
15025 suffix = "na";
15026 break;
15028 case CCCmode:
15029 suffix = "nc";
15030 break;
15032 case CCOmode:
15033 suffix = "no";
15034 break;
15036 case CCSmode:
15037 suffix = "ns";
15038 break;
15040 default:
15041 suffix = "ne";
15043 break;
15044 case GT:
15045 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
15046 suffix = "g";
15047 break;
15048 case GTU:
15049 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
15050 Those same assemblers have the same but opposite lossage on cmov. */
15051 if (mode == CCmode)
15052 suffix = fp ? "nbe" : "a";
15053 else
15054 gcc_unreachable ();
15055 break;
15056 case LT:
15057 switch (mode)
15059 case CCNOmode:
15060 case CCGOCmode:
15061 suffix = "s";
15062 break;
15064 case CCmode:
15065 case CCGCmode:
15066 suffix = "l";
15067 break;
15069 default:
15070 gcc_unreachable ();
15072 break;
15073 case LTU:
15074 if (mode == CCmode)
15075 suffix = "b";
15076 else if (mode == CCCmode)
15077 suffix = fp ? "b" : "c";
15078 else
15079 gcc_unreachable ();
15080 break;
15081 case GE:
15082 switch (mode)
15084 case CCNOmode:
15085 case CCGOCmode:
15086 suffix = "ns";
15087 break;
15089 case CCmode:
15090 case CCGCmode:
15091 suffix = "ge";
15092 break;
15094 default:
15095 gcc_unreachable ();
15097 break;
15098 case GEU:
15099 if (mode == CCmode)
15100 suffix = "nb";
15101 else if (mode == CCCmode)
15102 suffix = fp ? "nb" : "nc";
15103 else
15104 gcc_unreachable ();
15105 break;
15106 case LE:
15107 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
15108 suffix = "le";
15109 break;
15110 case LEU:
15111 if (mode == CCmode)
15112 suffix = "be";
15113 else
15114 gcc_unreachable ();
15115 break;
15116 case UNORDERED:
15117 suffix = fp ? "u" : "p";
15118 break;
15119 case ORDERED:
15120 suffix = fp ? "nu" : "np";
15121 break;
15122 default:
15123 gcc_unreachable ();
15125 fputs (suffix, file);
15128 /* Print the name of register X to FILE based on its machine mode and number.
15129 If CODE is 'w', pretend the mode is HImode.
15130 If CODE is 'b', pretend the mode is QImode.
15131 If CODE is 'k', pretend the mode is SImode.
15132 If CODE is 'q', pretend the mode is DImode.
15133 If CODE is 'x', pretend the mode is V4SFmode.
15134 If CODE is 't', pretend the mode is V8SFmode.
15135 If CODE is 'g', pretend the mode is V16SFmode.
15136 If CODE is 'h', pretend the reg is the 'high' byte register.
15137 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
15138 If CODE is 'd', duplicate the operand for AVX instruction.
15141 void
15142 print_reg (rtx x, int code, FILE *file)
15144 const char *reg;
15145 int msize;
15146 unsigned int regno;
15147 bool duplicated;
15149 if (ASSEMBLER_DIALECT == ASM_ATT)
15150 putc ('%', file);
15152 if (x == pc_rtx)
15154 gcc_assert (TARGET_64BIT);
15155 fputs ("rip", file);
15156 return;
15159 if (code == 'y' && STACK_TOP_P (x))
15161 fputs ("st(0)", file);
15162 return;
15165 if (code == 'w')
15166 msize = 2;
15167 else if (code == 'b')
15168 msize = 1;
15169 else if (code == 'k')
15170 msize = 4;
15171 else if (code == 'q')
15172 msize = 8;
15173 else if (code == 'h')
15174 msize = 0;
15175 else if (code == 'x')
15176 msize = 16;
15177 else if (code == 't')
15178 msize = 32;
15179 else if (code == 'g')
15180 msize = 64;
15181 else
15182 msize = GET_MODE_SIZE (GET_MODE (x));
15184 regno = true_regnum (x);
15186 gcc_assert (regno != ARG_POINTER_REGNUM
15187 && regno != FRAME_POINTER_REGNUM
15188 && regno != FLAGS_REG
15189 && regno != FPSR_REG
15190 && regno != FPCR_REG);
15192 duplicated = code == 'd' && TARGET_AVX;
15194 switch (msize)
15196 case 8:
15197 case 4:
15198 if (LEGACY_INT_REGNO_P (regno))
15199 putc (msize == 8 ? 'r' : 'e', file);
15200 case 16:
15201 case 12:
15202 case 2:
15203 normal:
15204 reg = hi_reg_name[regno];
15205 break;
15206 case 1:
15207 if (regno >= ARRAY_SIZE (qi_reg_name))
15208 goto normal;
15209 reg = qi_reg_name[regno];
15210 break;
15211 case 0:
15212 if (regno >= ARRAY_SIZE (qi_high_reg_name))
15213 goto normal;
15214 reg = qi_high_reg_name[regno];
15215 break;
15216 case 32:
15217 case 64:
15218 if (SSE_REGNO_P (regno))
15220 gcc_assert (!duplicated);
15221 putc (msize == 32 ? 'y' : 'z', file);
15222 reg = hi_reg_name[regno] + 1;
15223 break;
15225 goto normal;
15226 default:
15227 gcc_unreachable ();
15230 fputs (reg, file);
15232 /* Irritatingly, AMD extended registers use
15233 different naming convention: "r%d[bwd]" */
15234 if (REX_INT_REGNO_P (regno))
15236 gcc_assert (TARGET_64BIT);
15237 switch (msize)
15239 case 0:
15240 error ("extended registers have no high halves");
15241 break;
15242 case 1:
15243 putc ('b', file);
15244 break;
15245 case 2:
15246 putc ('w', file);
15247 break;
15248 case 4:
15249 putc ('d', file);
15250 break;
15251 case 8:
15252 /* no suffix */
15253 break;
15254 default:
15255 error ("unsupported operand size for extended register");
15256 break;
15258 return;
15261 if (duplicated)
15263 if (ASSEMBLER_DIALECT == ASM_ATT)
15264 fprintf (file, ", %%%s", reg);
15265 else
15266 fprintf (file, ", %s", reg);
15270 /* Meaning of CODE:
15271 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15272 C -- print opcode suffix for set/cmov insn.
15273 c -- like C, but print reversed condition
15274 F,f -- likewise, but for floating-point.
15275 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15276 otherwise nothing
15277 R -- print embeded rounding and sae.
15278 r -- print only sae.
15279 z -- print the opcode suffix for the size of the current operand.
15280 Z -- likewise, with special suffixes for x87 instructions.
15281 * -- print a star (in certain assembler syntax)
15282 A -- print an absolute memory reference.
15283 E -- print address with DImode register names if TARGET_64BIT.
15284 w -- print the operand as if it's a "word" (HImode) even if it isn't.
15285 s -- print a shift double count, followed by the assemblers argument
15286 delimiter.
15287 b -- print the QImode name of the register for the indicated operand.
15288 %b0 would print %al if operands[0] is reg 0.
15289 w -- likewise, print the HImode name of the register.
15290 k -- likewise, print the SImode name of the register.
15291 q -- likewise, print the DImode name of the register.
15292 x -- likewise, print the V4SFmode name of the register.
15293 t -- likewise, print the V8SFmode name of the register.
15294 g -- likewise, print the V16SFmode name of the register.
15295 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15296 y -- print "st(0)" instead of "st" as a register.
15297 d -- print duplicated register operand for AVX instruction.
15298 D -- print condition for SSE cmp instruction.
15299 P -- if PIC, print an @PLT suffix.
15300 p -- print raw symbol name.
15301 X -- don't print any sort of PIC '@' suffix for a symbol.
15302 & -- print some in-use local-dynamic symbol name.
15303 H -- print a memory address offset by 8; used for sse high-parts
15304 Y -- print condition for XOP pcom* instruction.
15305 + -- print a branch hint as 'cs' or 'ds' prefix
15306 ; -- print a semicolon (after prefixes due to bug in older gas).
15307 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15308 @ -- print a segment register of thread base pointer load
15309 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15310 ! -- print MPX prefix for jxx/call/ret instructions if required.
15313 void
15314 ix86_print_operand (FILE *file, rtx x, int code)
15316 if (code)
15318 switch (code)
15320 case 'A':
15321 switch (ASSEMBLER_DIALECT)
15323 case ASM_ATT:
15324 putc ('*', file);
15325 break;
15327 case ASM_INTEL:
15328 /* Intel syntax. For absolute addresses, registers should not
15329 be surrounded by braces. */
15330 if (!REG_P (x))
15332 putc ('[', file);
15333 ix86_print_operand (file, x, 0);
15334 putc (']', file);
15335 return;
15337 break;
15339 default:
15340 gcc_unreachable ();
15343 ix86_print_operand (file, x, 0);
15344 return;
15346 case 'E':
15347 /* Wrap address in an UNSPEC to declare special handling. */
15348 if (TARGET_64BIT)
15349 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15351 output_address (x);
15352 return;
15354 case 'L':
15355 if (ASSEMBLER_DIALECT == ASM_ATT)
15356 putc ('l', file);
15357 return;
15359 case 'W':
15360 if (ASSEMBLER_DIALECT == ASM_ATT)
15361 putc ('w', file);
15362 return;
15364 case 'B':
15365 if (ASSEMBLER_DIALECT == ASM_ATT)
15366 putc ('b', file);
15367 return;
15369 case 'Q':
15370 if (ASSEMBLER_DIALECT == ASM_ATT)
15371 putc ('l', file);
15372 return;
15374 case 'S':
15375 if (ASSEMBLER_DIALECT == ASM_ATT)
15376 putc ('s', file);
15377 return;
15379 case 'T':
15380 if (ASSEMBLER_DIALECT == ASM_ATT)
15381 putc ('t', file);
15382 return;
15384 case 'O':
15385 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15386 if (ASSEMBLER_DIALECT != ASM_ATT)
15387 return;
15389 switch (GET_MODE_SIZE (GET_MODE (x)))
15391 case 2:
15392 putc ('w', file);
15393 break;
15395 case 4:
15396 putc ('l', file);
15397 break;
15399 case 8:
15400 putc ('q', file);
15401 break;
15403 default:
15404 output_operand_lossage
15405 ("invalid operand size for operand code 'O'");
15406 return;
15409 putc ('.', file);
15410 #endif
15411 return;
15413 case 'z':
15414 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15416 /* Opcodes don't get size suffixes if using Intel opcodes. */
15417 if (ASSEMBLER_DIALECT == ASM_INTEL)
15418 return;
15420 switch (GET_MODE_SIZE (GET_MODE (x)))
15422 case 1:
15423 putc ('b', file);
15424 return;
15426 case 2:
15427 putc ('w', file);
15428 return;
15430 case 4:
15431 putc ('l', file);
15432 return;
15434 case 8:
15435 putc ('q', file);
15436 return;
15438 default:
15439 output_operand_lossage
15440 ("invalid operand size for operand code 'z'");
15441 return;
15445 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15446 warning
15447 (0, "non-integer operand used with operand code 'z'");
15448 /* FALLTHRU */
15450 case 'Z':
15451 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15452 if (ASSEMBLER_DIALECT == ASM_INTEL)
15453 return;
15455 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15457 switch (GET_MODE_SIZE (GET_MODE (x)))
15459 case 2:
15460 #ifdef HAVE_AS_IX86_FILDS
15461 putc ('s', file);
15462 #endif
15463 return;
15465 case 4:
15466 putc ('l', file);
15467 return;
15469 case 8:
15470 #ifdef HAVE_AS_IX86_FILDQ
15471 putc ('q', file);
15472 #else
15473 fputs ("ll", file);
15474 #endif
15475 return;
15477 default:
15478 break;
15481 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15483 /* 387 opcodes don't get size suffixes
15484 if the operands are registers. */
15485 if (STACK_REG_P (x))
15486 return;
15488 switch (GET_MODE_SIZE (GET_MODE (x)))
15490 case 4:
15491 putc ('s', file);
15492 return;
15494 case 8:
15495 putc ('l', file);
15496 return;
15498 case 12:
15499 case 16:
15500 putc ('t', file);
15501 return;
15503 default:
15504 break;
15507 else
15509 output_operand_lossage
15510 ("invalid operand type used with operand code 'Z'");
15511 return;
15514 output_operand_lossage
15515 ("invalid operand size for operand code 'Z'");
15516 return;
15518 case 'd':
15519 case 'b':
15520 case 'w':
15521 case 'k':
15522 case 'q':
15523 case 'h':
15524 case 't':
15525 case 'g':
15526 case 'y':
15527 case 'x':
15528 case 'X':
15529 case 'P':
15530 case 'p':
15531 break;
15533 case 's':
15534 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15536 ix86_print_operand (file, x, 0);
15537 fputs (", ", file);
15539 return;
15541 case 'Y':
15542 switch (GET_CODE (x))
15544 case NE:
15545 fputs ("neq", file);
15546 break;
15547 case EQ:
15548 fputs ("eq", file);
15549 break;
15550 case GE:
15551 case GEU:
15552 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15553 break;
15554 case GT:
15555 case GTU:
15556 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15557 break;
15558 case LE:
15559 case LEU:
15560 fputs ("le", file);
15561 break;
15562 case LT:
15563 case LTU:
15564 fputs ("lt", file);
15565 break;
15566 case UNORDERED:
15567 fputs ("unord", file);
15568 break;
15569 case ORDERED:
15570 fputs ("ord", file);
15571 break;
15572 case UNEQ:
15573 fputs ("ueq", file);
15574 break;
15575 case UNGE:
15576 fputs ("nlt", file);
15577 break;
15578 case UNGT:
15579 fputs ("nle", file);
15580 break;
15581 case UNLE:
15582 fputs ("ule", file);
15583 break;
15584 case UNLT:
15585 fputs ("ult", file);
15586 break;
15587 case LTGT:
15588 fputs ("une", file);
15589 break;
15590 default:
15591 output_operand_lossage ("operand is not a condition code, "
15592 "invalid operand code 'Y'");
15593 return;
15595 return;
15597 case 'D':
15598 /* Little bit of braindamage here. The SSE compare instructions
15599 does use completely different names for the comparisons that the
15600 fp conditional moves. */
15601 switch (GET_CODE (x))
15603 case UNEQ:
15604 if (TARGET_AVX)
15606 fputs ("eq_us", file);
15607 break;
15609 case EQ:
15610 fputs ("eq", file);
15611 break;
15612 case UNLT:
15613 if (TARGET_AVX)
15615 fputs ("nge", file);
15616 break;
15618 case LT:
15619 fputs ("lt", file);
15620 break;
15621 case UNLE:
15622 if (TARGET_AVX)
15624 fputs ("ngt", file);
15625 break;
15627 case LE:
15628 fputs ("le", file);
15629 break;
15630 case UNORDERED:
15631 fputs ("unord", file);
15632 break;
15633 case LTGT:
15634 if (TARGET_AVX)
15636 fputs ("neq_oq", file);
15637 break;
15639 case NE:
15640 fputs ("neq", file);
15641 break;
15642 case GE:
15643 if (TARGET_AVX)
15645 fputs ("ge", file);
15646 break;
15648 case UNGE:
15649 fputs ("nlt", file);
15650 break;
15651 case GT:
15652 if (TARGET_AVX)
15654 fputs ("gt", file);
15655 break;
15657 case UNGT:
15658 fputs ("nle", file);
15659 break;
15660 case ORDERED:
15661 fputs ("ord", file);
15662 break;
15663 default:
15664 output_operand_lossage ("operand is not a condition code, "
15665 "invalid operand code 'D'");
15666 return;
15668 return;
15670 case 'F':
15671 case 'f':
15672 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15673 if (ASSEMBLER_DIALECT == ASM_ATT)
15674 putc ('.', file);
15675 #endif
15677 case 'C':
15678 case 'c':
15679 if (!COMPARISON_P (x))
15681 output_operand_lossage ("operand is not a condition code, "
15682 "invalid operand code '%c'", code);
15683 return;
15685 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15686 code == 'c' || code == 'f',
15687 code == 'F' || code == 'f',
15688 file);
15689 return;
15691 case 'H':
15692 if (!offsettable_memref_p (x))
15694 output_operand_lossage ("operand is not an offsettable memory "
15695 "reference, invalid operand code 'H'");
15696 return;
15698 /* It doesn't actually matter what mode we use here, as we're
15699 only going to use this for printing. */
15700 x = adjust_address_nv (x, DImode, 8);
15701 /* Output 'qword ptr' for intel assembler dialect. */
15702 if (ASSEMBLER_DIALECT == ASM_INTEL)
15703 code = 'q';
15704 break;
15706 case 'K':
15707 gcc_assert (CONST_INT_P (x));
15709 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15710 #ifdef HAVE_AS_IX86_HLE
15711 fputs ("xacquire ", file);
15712 #else
15713 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15714 #endif
15715 else if (INTVAL (x) & IX86_HLE_RELEASE)
15716 #ifdef HAVE_AS_IX86_HLE
15717 fputs ("xrelease ", file);
15718 #else
15719 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15720 #endif
15721 /* We do not want to print value of the operand. */
15722 return;
15724 case 'N':
15725 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15726 fputs ("{z}", file);
15727 return;
15729 case 'r':
15730 gcc_assert (CONST_INT_P (x));
15731 gcc_assert (INTVAL (x) == ROUND_SAE);
15733 if (ASSEMBLER_DIALECT == ASM_INTEL)
15734 fputs (", ", file);
15736 fputs ("{sae}", file);
15738 if (ASSEMBLER_DIALECT == ASM_ATT)
15739 fputs (", ", file);
15741 return;
15743 case 'R':
15744 gcc_assert (CONST_INT_P (x));
15746 if (ASSEMBLER_DIALECT == ASM_INTEL)
15747 fputs (", ", file);
15749 switch (INTVAL (x))
15751 case ROUND_NEAREST_INT | ROUND_SAE:
15752 fputs ("{rn-sae}", file);
15753 break;
15754 case ROUND_NEG_INF | ROUND_SAE:
15755 fputs ("{rd-sae}", file);
15756 break;
15757 case ROUND_POS_INF | ROUND_SAE:
15758 fputs ("{ru-sae}", file);
15759 break;
15760 case ROUND_ZERO | ROUND_SAE:
15761 fputs ("{rz-sae}", file);
15762 break;
15763 default:
15764 gcc_unreachable ();
15767 if (ASSEMBLER_DIALECT == ASM_ATT)
15768 fputs (", ", file);
15770 return;
15772 case '*':
15773 if (ASSEMBLER_DIALECT == ASM_ATT)
15774 putc ('*', file);
15775 return;
15777 case '&':
15779 const char *name = get_some_local_dynamic_name ();
15780 if (name == NULL)
15781 output_operand_lossage ("'%%&' used without any "
15782 "local dynamic TLS references");
15783 else
15784 assemble_name (file, name);
15785 return;
15788 case '+':
15790 rtx x;
15792 if (!optimize
15793 || optimize_function_for_size_p (cfun)
15794 || !TARGET_BRANCH_PREDICTION_HINTS)
15795 return;
15797 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15798 if (x)
15800 int pred_val = XINT (x, 0);
15802 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15803 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15805 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15806 bool cputaken
15807 = final_forward_branch_p (current_output_insn) == 0;
15809 /* Emit hints only in the case default branch prediction
15810 heuristics would fail. */
15811 if (taken != cputaken)
15813 /* We use 3e (DS) prefix for taken branches and
15814 2e (CS) prefix for not taken branches. */
15815 if (taken)
15816 fputs ("ds ; ", file);
15817 else
15818 fputs ("cs ; ", file);
15822 return;
15825 case ';':
15826 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15827 putc (';', file);
15828 #endif
15829 return;
15831 case '@':
15832 if (ASSEMBLER_DIALECT == ASM_ATT)
15833 putc ('%', file);
15835 /* The kernel uses a different segment register for performance
15836 reasons; a system call would not have to trash the userspace
15837 segment register, which would be expensive. */
15838 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15839 fputs ("fs", file);
15840 else
15841 fputs ("gs", file);
15842 return;
15844 case '~':
15845 putc (TARGET_AVX2 ? 'i' : 'f', file);
15846 return;
15848 case '^':
15849 if (TARGET_64BIT && Pmode != word_mode)
15850 fputs ("addr32 ", file);
15851 return;
15853 case '!':
15854 if (ix86_bnd_prefixed_insn_p (current_output_insn))
15855 fputs ("bnd ", file);
15856 return;
15858 default:
15859 output_operand_lossage ("invalid operand code '%c'", code);
15863 if (REG_P (x))
15864 print_reg (x, code, file);
15866 else if (MEM_P (x))
15868 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15869 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15870 && GET_MODE (x) != BLKmode)
15872 const char * size;
15873 switch (GET_MODE_SIZE (GET_MODE (x)))
15875 case 1: size = "BYTE"; break;
15876 case 2: size = "WORD"; break;
15877 case 4: size = "DWORD"; break;
15878 case 8: size = "QWORD"; break;
15879 case 12: size = "TBYTE"; break;
15880 case 16:
15881 if (GET_MODE (x) == XFmode)
15882 size = "TBYTE";
15883 else
15884 size = "XMMWORD";
15885 break;
15886 case 32: size = "YMMWORD"; break;
15887 case 64: size = "ZMMWORD"; break;
15888 default:
15889 gcc_unreachable ();
15892 /* Check for explicit size override (codes 'b', 'w', 'k',
15893 'q' and 'x') */
15894 if (code == 'b')
15895 size = "BYTE";
15896 else if (code == 'w')
15897 size = "WORD";
15898 else if (code == 'k')
15899 size = "DWORD";
15900 else if (code == 'q')
15901 size = "QWORD";
15902 else if (code == 'x')
15903 size = "XMMWORD";
15905 fputs (size, file);
15906 fputs (" PTR ", file);
15909 x = XEXP (x, 0);
15910 /* Avoid (%rip) for call operands. */
15911 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15912 && !CONST_INT_P (x))
15913 output_addr_const (file, x);
15914 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15915 output_operand_lossage ("invalid constraints for operand");
15916 else
15917 output_address (x);
15920 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
15922 REAL_VALUE_TYPE r;
15923 long l;
15925 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15926 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15928 if (ASSEMBLER_DIALECT == ASM_ATT)
15929 putc ('$', file);
15930 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15931 if (code == 'q')
15932 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15933 (unsigned long long) (int) l);
15934 else
15935 fprintf (file, "0x%08x", (unsigned int) l);
15938 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
15940 REAL_VALUE_TYPE r;
15941 long l[2];
15943 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15944 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15946 if (ASSEMBLER_DIALECT == ASM_ATT)
15947 putc ('$', file);
15948 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15951 /* These float cases don't actually occur as immediate operands. */
15952 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
15954 char dstr[30];
15956 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15957 fputs (dstr, file);
15960 else
15962 /* We have patterns that allow zero sets of memory, for instance.
15963 In 64-bit mode, we should probably support all 8-byte vectors,
15964 since we can in fact encode that into an immediate. */
15965 if (GET_CODE (x) == CONST_VECTOR)
15967 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15968 x = const0_rtx;
15971 if (code != 'P' && code != 'p')
15973 if (CONST_INT_P (x))
15975 if (ASSEMBLER_DIALECT == ASM_ATT)
15976 putc ('$', file);
15978 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15979 || GET_CODE (x) == LABEL_REF)
15981 if (ASSEMBLER_DIALECT == ASM_ATT)
15982 putc ('$', file);
15983 else
15984 fputs ("OFFSET FLAT:", file);
15987 if (CONST_INT_P (x))
15988 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15989 else if (flag_pic || MACHOPIC_INDIRECT)
15990 output_pic_addr_const (file, x, code);
15991 else
15992 output_addr_const (file, x);
15996 static bool
15997 ix86_print_operand_punct_valid_p (unsigned char code)
15999 return (code == '@' || code == '*' || code == '+' || code == '&'
16000 || code == ';' || code == '~' || code == '^' || code == '!');
16003 /* Print a memory operand whose address is ADDR. */
16005 static void
16006 ix86_print_operand_address (FILE *file, rtx addr)
16008 struct ix86_address parts;
16009 rtx base, index, disp;
16010 int scale;
16011 int ok;
16012 bool vsib = false;
16013 int code = 0;
16015 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
16017 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16018 gcc_assert (parts.index == NULL_RTX);
16019 parts.index = XVECEXP (addr, 0, 1);
16020 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
16021 addr = XVECEXP (addr, 0, 0);
16022 vsib = true;
16024 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
16026 gcc_assert (TARGET_64BIT);
16027 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16028 code = 'q';
16030 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
16032 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
16033 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
16034 if (parts.base != NULL_RTX)
16036 parts.index = parts.base;
16037 parts.scale = 1;
16039 parts.base = XVECEXP (addr, 0, 0);
16040 addr = XVECEXP (addr, 0, 0);
16042 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
16044 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16045 gcc_assert (parts.index == NULL_RTX);
16046 parts.index = XVECEXP (addr, 0, 1);
16047 addr = XVECEXP (addr, 0, 0);
16049 else
16050 ok = ix86_decompose_address (addr, &parts);
16052 gcc_assert (ok);
16054 base = parts.base;
16055 index = parts.index;
16056 disp = parts.disp;
16057 scale = parts.scale;
16059 switch (parts.seg)
16061 case SEG_DEFAULT:
16062 break;
16063 case SEG_FS:
16064 case SEG_GS:
16065 if (ASSEMBLER_DIALECT == ASM_ATT)
16066 putc ('%', file);
16067 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
16068 break;
16069 default:
16070 gcc_unreachable ();
16073 /* Use one byte shorter RIP relative addressing for 64bit mode. */
16074 if (TARGET_64BIT && !base && !index)
16076 rtx symbol = disp;
16078 if (GET_CODE (disp) == CONST
16079 && GET_CODE (XEXP (disp, 0)) == PLUS
16080 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16081 symbol = XEXP (XEXP (disp, 0), 0);
16083 if (GET_CODE (symbol) == LABEL_REF
16084 || (GET_CODE (symbol) == SYMBOL_REF
16085 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
16086 base = pc_rtx;
16088 if (!base && !index)
16090 /* Displacement only requires special attention. */
16092 if (CONST_INT_P (disp))
16094 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
16095 fputs ("ds:", file);
16096 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
16098 else if (flag_pic)
16099 output_pic_addr_const (file, disp, 0);
16100 else
16101 output_addr_const (file, disp);
16103 else
16105 /* Print SImode register names to force addr32 prefix. */
16106 if (SImode_address_operand (addr, VOIDmode))
16108 #ifdef ENABLE_CHECKING
16109 gcc_assert (TARGET_64BIT);
16110 switch (GET_CODE (addr))
16112 case SUBREG:
16113 gcc_assert (GET_MODE (addr) == SImode);
16114 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
16115 break;
16116 case ZERO_EXTEND:
16117 case AND:
16118 gcc_assert (GET_MODE (addr) == DImode);
16119 break;
16120 default:
16121 gcc_unreachable ();
16123 #endif
16124 gcc_assert (!code);
16125 code = 'k';
16127 else if (code == 0
16128 && TARGET_X32
16129 && disp
16130 && CONST_INT_P (disp)
16131 && INTVAL (disp) < -16*1024*1024)
16133 /* X32 runs in 64-bit mode, where displacement, DISP, in
16134 address DISP(%r64), is encoded as 32-bit immediate sign-
16135 extended from 32-bit to 64-bit. For -0x40000300(%r64),
16136 address is %r64 + 0xffffffffbffffd00. When %r64 <
16137 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
16138 which is invalid for x32. The correct address is %r64
16139 - 0x40000300 == 0xf7ffdd64. To properly encode
16140 -0x40000300(%r64) for x32, we zero-extend negative
16141 displacement by forcing addr32 prefix which truncates
16142 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
16143 zero-extend all negative displacements, including -1(%rsp).
16144 However, for small negative displacements, sign-extension
16145 won't cause overflow. We only zero-extend negative
16146 displacements if they < -16*1024*1024, which is also used
16147 to check legitimate address displacements for PIC. */
16148 code = 'k';
16151 if (ASSEMBLER_DIALECT == ASM_ATT)
16153 if (disp)
16155 if (flag_pic)
16156 output_pic_addr_const (file, disp, 0);
16157 else if (GET_CODE (disp) == LABEL_REF)
16158 output_asm_label (disp);
16159 else
16160 output_addr_const (file, disp);
16163 putc ('(', file);
16164 if (base)
16165 print_reg (base, code, file);
16166 if (index)
16168 putc (',', file);
16169 print_reg (index, vsib ? 0 : code, file);
16170 if (scale != 1 || vsib)
16171 fprintf (file, ",%d", scale);
16173 putc (')', file);
16175 else
16177 rtx offset = NULL_RTX;
16179 if (disp)
16181 /* Pull out the offset of a symbol; print any symbol itself. */
16182 if (GET_CODE (disp) == CONST
16183 && GET_CODE (XEXP (disp, 0)) == PLUS
16184 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16186 offset = XEXP (XEXP (disp, 0), 1);
16187 disp = gen_rtx_CONST (VOIDmode,
16188 XEXP (XEXP (disp, 0), 0));
16191 if (flag_pic)
16192 output_pic_addr_const (file, disp, 0);
16193 else if (GET_CODE (disp) == LABEL_REF)
16194 output_asm_label (disp);
16195 else if (CONST_INT_P (disp))
16196 offset = disp;
16197 else
16198 output_addr_const (file, disp);
16201 putc ('[', file);
16202 if (base)
16204 print_reg (base, code, file);
16205 if (offset)
16207 if (INTVAL (offset) >= 0)
16208 putc ('+', file);
16209 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16212 else if (offset)
16213 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16214 else
16215 putc ('0', file);
16217 if (index)
16219 putc ('+', file);
16220 print_reg (index, vsib ? 0 : code, file);
16221 if (scale != 1 || vsib)
16222 fprintf (file, "*%d", scale);
16224 putc (']', file);
16229 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
16231 static bool
16232 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16234 rtx op;
16236 if (GET_CODE (x) != UNSPEC)
16237 return false;
16239 op = XVECEXP (x, 0, 0);
16240 switch (XINT (x, 1))
16242 case UNSPEC_GOTTPOFF:
16243 output_addr_const (file, op);
16244 /* FIXME: This might be @TPOFF in Sun ld. */
16245 fputs ("@gottpoff", file);
16246 break;
16247 case UNSPEC_TPOFF:
16248 output_addr_const (file, op);
16249 fputs ("@tpoff", file);
16250 break;
16251 case UNSPEC_NTPOFF:
16252 output_addr_const (file, op);
16253 if (TARGET_64BIT)
16254 fputs ("@tpoff", file);
16255 else
16256 fputs ("@ntpoff", file);
16257 break;
16258 case UNSPEC_DTPOFF:
16259 output_addr_const (file, op);
16260 fputs ("@dtpoff", file);
16261 break;
16262 case UNSPEC_GOTNTPOFF:
16263 output_addr_const (file, op);
16264 if (TARGET_64BIT)
16265 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16266 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16267 else
16268 fputs ("@gotntpoff", file);
16269 break;
16270 case UNSPEC_INDNTPOFF:
16271 output_addr_const (file, op);
16272 fputs ("@indntpoff", file);
16273 break;
16274 #if TARGET_MACHO
16275 case UNSPEC_MACHOPIC_OFFSET:
16276 output_addr_const (file, op);
16277 putc ('-', file);
16278 machopic_output_function_base_name (file);
16279 break;
16280 #endif
16282 case UNSPEC_STACK_CHECK:
16284 int offset;
16286 gcc_assert (flag_split_stack);
16288 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16289 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16290 #else
16291 gcc_unreachable ();
16292 #endif
16294 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16296 break;
16298 default:
16299 return false;
16302 return true;
16305 /* Split one or more double-mode RTL references into pairs of half-mode
16306 references. The RTL can be REG, offsettable MEM, integer constant, or
16307 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16308 split and "num" is its length. lo_half and hi_half are output arrays
16309 that parallel "operands". */
16311 void
16312 split_double_mode (machine_mode mode, rtx operands[],
16313 int num, rtx lo_half[], rtx hi_half[])
16315 machine_mode half_mode;
16316 unsigned int byte;
16318 switch (mode)
16320 case TImode:
16321 half_mode = DImode;
16322 break;
16323 case DImode:
16324 half_mode = SImode;
16325 break;
16326 default:
16327 gcc_unreachable ();
16330 byte = GET_MODE_SIZE (half_mode);
16332 while (num--)
16334 rtx op = operands[num];
16336 /* simplify_subreg refuse to split volatile memory addresses,
16337 but we still have to handle it. */
16338 if (MEM_P (op))
16340 lo_half[num] = adjust_address (op, half_mode, 0);
16341 hi_half[num] = adjust_address (op, half_mode, byte);
16343 else
16345 lo_half[num] = simplify_gen_subreg (half_mode, op,
16346 GET_MODE (op) == VOIDmode
16347 ? mode : GET_MODE (op), 0);
16348 hi_half[num] = simplify_gen_subreg (half_mode, op,
16349 GET_MODE (op) == VOIDmode
16350 ? mode : GET_MODE (op), byte);
16355 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16356 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16357 is the expression of the binary operation. The output may either be
16358 emitted here, or returned to the caller, like all output_* functions.
16360 There is no guarantee that the operands are the same mode, as they
16361 might be within FLOAT or FLOAT_EXTEND expressions. */
16363 #ifndef SYSV386_COMPAT
16364 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16365 wants to fix the assemblers because that causes incompatibility
16366 with gcc. No-one wants to fix gcc because that causes
16367 incompatibility with assemblers... You can use the option of
16368 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16369 #define SYSV386_COMPAT 1
16370 #endif
16372 const char *
16373 output_387_binary_op (rtx insn, rtx *operands)
16375 static char buf[40];
16376 const char *p;
16377 const char *ssep;
16378 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16380 #ifdef ENABLE_CHECKING
16381 /* Even if we do not want to check the inputs, this documents input
16382 constraints. Which helps in understanding the following code. */
16383 if (STACK_REG_P (operands[0])
16384 && ((REG_P (operands[1])
16385 && REGNO (operands[0]) == REGNO (operands[1])
16386 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16387 || (REG_P (operands[2])
16388 && REGNO (operands[0]) == REGNO (operands[2])
16389 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16390 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16391 ; /* ok */
16392 else
16393 gcc_assert (is_sse);
16394 #endif
16396 switch (GET_CODE (operands[3]))
16398 case PLUS:
16399 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16400 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16401 p = "fiadd";
16402 else
16403 p = "fadd";
16404 ssep = "vadd";
16405 break;
16407 case MINUS:
16408 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16409 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16410 p = "fisub";
16411 else
16412 p = "fsub";
16413 ssep = "vsub";
16414 break;
16416 case MULT:
16417 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16418 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16419 p = "fimul";
16420 else
16421 p = "fmul";
16422 ssep = "vmul";
16423 break;
16425 case DIV:
16426 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16427 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16428 p = "fidiv";
16429 else
16430 p = "fdiv";
16431 ssep = "vdiv";
16432 break;
16434 default:
16435 gcc_unreachable ();
16438 if (is_sse)
16440 if (TARGET_AVX)
16442 strcpy (buf, ssep);
16443 if (GET_MODE (operands[0]) == SFmode)
16444 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16445 else
16446 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16448 else
16450 strcpy (buf, ssep + 1);
16451 if (GET_MODE (operands[0]) == SFmode)
16452 strcat (buf, "ss\t{%2, %0|%0, %2}");
16453 else
16454 strcat (buf, "sd\t{%2, %0|%0, %2}");
16456 return buf;
16458 strcpy (buf, p);
16460 switch (GET_CODE (operands[3]))
16462 case MULT:
16463 case PLUS:
16464 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16465 std::swap (operands[1], operands[2]);
16467 /* know operands[0] == operands[1]. */
16469 if (MEM_P (operands[2]))
16471 p = "%Z2\t%2";
16472 break;
16475 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16477 if (STACK_TOP_P (operands[0]))
16478 /* How is it that we are storing to a dead operand[2]?
16479 Well, presumably operands[1] is dead too. We can't
16480 store the result to st(0) as st(0) gets popped on this
16481 instruction. Instead store to operands[2] (which I
16482 think has to be st(1)). st(1) will be popped later.
16483 gcc <= 2.8.1 didn't have this check and generated
16484 assembly code that the Unixware assembler rejected. */
16485 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16486 else
16487 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16488 break;
16491 if (STACK_TOP_P (operands[0]))
16492 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16493 else
16494 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16495 break;
16497 case MINUS:
16498 case DIV:
16499 if (MEM_P (operands[1]))
16501 p = "r%Z1\t%1";
16502 break;
16505 if (MEM_P (operands[2]))
16507 p = "%Z2\t%2";
16508 break;
16511 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16513 #if SYSV386_COMPAT
16514 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16515 derived assemblers, confusingly reverse the direction of
16516 the operation for fsub{r} and fdiv{r} when the
16517 destination register is not st(0). The Intel assembler
16518 doesn't have this brain damage. Read !SYSV386_COMPAT to
16519 figure out what the hardware really does. */
16520 if (STACK_TOP_P (operands[0]))
16521 p = "{p\t%0, %2|rp\t%2, %0}";
16522 else
16523 p = "{rp\t%2, %0|p\t%0, %2}";
16524 #else
16525 if (STACK_TOP_P (operands[0]))
16526 /* As above for fmul/fadd, we can't store to st(0). */
16527 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16528 else
16529 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16530 #endif
16531 break;
16534 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16536 #if SYSV386_COMPAT
16537 if (STACK_TOP_P (operands[0]))
16538 p = "{rp\t%0, %1|p\t%1, %0}";
16539 else
16540 p = "{p\t%1, %0|rp\t%0, %1}";
16541 #else
16542 if (STACK_TOP_P (operands[0]))
16543 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16544 else
16545 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16546 #endif
16547 break;
16550 if (STACK_TOP_P (operands[0]))
16552 if (STACK_TOP_P (operands[1]))
16553 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16554 else
16555 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16556 break;
16558 else if (STACK_TOP_P (operands[1]))
16560 #if SYSV386_COMPAT
16561 p = "{\t%1, %0|r\t%0, %1}";
16562 #else
16563 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16564 #endif
16566 else
16568 #if SYSV386_COMPAT
16569 p = "{r\t%2, %0|\t%0, %2}";
16570 #else
16571 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16572 #endif
16574 break;
16576 default:
16577 gcc_unreachable ();
16580 strcat (buf, p);
16581 return buf;
16584 /* Check if a 256bit AVX register is referenced inside of EXP. */
16586 static bool
16587 ix86_check_avx256_register (const_rtx exp)
16589 if (GET_CODE (exp) == SUBREG)
16590 exp = SUBREG_REG (exp);
16592 return (REG_P (exp)
16593 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16596 /* Return needed mode for entity in optimize_mode_switching pass. */
16598 static int
16599 ix86_avx_u128_mode_needed (rtx_insn *insn)
16601 if (CALL_P (insn))
16603 rtx link;
16605 /* Needed mode is set to AVX_U128_CLEAN if there are
16606 no 256bit modes used in function arguments. */
16607 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16608 link;
16609 link = XEXP (link, 1))
16611 if (GET_CODE (XEXP (link, 0)) == USE)
16613 rtx arg = XEXP (XEXP (link, 0), 0);
16615 if (ix86_check_avx256_register (arg))
16616 return AVX_U128_DIRTY;
16620 return AVX_U128_CLEAN;
16623 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16624 changes state only when a 256bit register is written to, but we need
16625 to prevent the compiler from moving optimal insertion point above
16626 eventual read from 256bit register. */
16627 subrtx_iterator::array_type array;
16628 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16629 if (ix86_check_avx256_register (*iter))
16630 return AVX_U128_DIRTY;
16632 return AVX_U128_ANY;
16635 /* Return mode that i387 must be switched into
16636 prior to the execution of insn. */
16638 static int
16639 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16641 enum attr_i387_cw mode;
16643 /* The mode UNINITIALIZED is used to store control word after a
16644 function call or ASM pattern. The mode ANY specify that function
16645 has no requirements on the control word and make no changes in the
16646 bits we are interested in. */
16648 if (CALL_P (insn)
16649 || (NONJUMP_INSN_P (insn)
16650 && (asm_noperands (PATTERN (insn)) >= 0
16651 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16652 return I387_CW_UNINITIALIZED;
16654 if (recog_memoized (insn) < 0)
16655 return I387_CW_ANY;
16657 mode = get_attr_i387_cw (insn);
16659 switch (entity)
16661 case I387_TRUNC:
16662 if (mode == I387_CW_TRUNC)
16663 return mode;
16664 break;
16666 case I387_FLOOR:
16667 if (mode == I387_CW_FLOOR)
16668 return mode;
16669 break;
16671 case I387_CEIL:
16672 if (mode == I387_CW_CEIL)
16673 return mode;
16674 break;
16676 case I387_MASK_PM:
16677 if (mode == I387_CW_MASK_PM)
16678 return mode;
16679 break;
16681 default:
16682 gcc_unreachable ();
16685 return I387_CW_ANY;
16688 /* Return mode that entity must be switched into
16689 prior to the execution of insn. */
16691 static int
16692 ix86_mode_needed (int entity, rtx_insn *insn)
16694 switch (entity)
16696 case AVX_U128:
16697 return ix86_avx_u128_mode_needed (insn);
16698 case I387_TRUNC:
16699 case I387_FLOOR:
16700 case I387_CEIL:
16701 case I387_MASK_PM:
16702 return ix86_i387_mode_needed (entity, insn);
16703 default:
16704 gcc_unreachable ();
16706 return 0;
16709 /* Check if a 256bit AVX register is referenced in stores. */
16711 static void
16712 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16714 if (ix86_check_avx256_register (dest))
16716 bool *used = (bool *) data;
16717 *used = true;
16721 /* Calculate mode of upper 128bit AVX registers after the insn. */
16723 static int
16724 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16726 rtx pat = PATTERN (insn);
16728 if (vzeroupper_operation (pat, VOIDmode)
16729 || vzeroall_operation (pat, VOIDmode))
16730 return AVX_U128_CLEAN;
16732 /* We know that state is clean after CALL insn if there are no
16733 256bit registers used in the function return register. */
16734 if (CALL_P (insn))
16736 bool avx_reg256_found = false;
16737 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16739 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16742 /* Otherwise, return current mode. Remember that if insn
16743 references AVX 256bit registers, the mode was already changed
16744 to DIRTY from MODE_NEEDED. */
16745 return mode;
16748 /* Return the mode that an insn results in. */
16750 static int
16751 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16753 switch (entity)
16755 case AVX_U128:
16756 return ix86_avx_u128_mode_after (mode, insn);
16757 case I387_TRUNC:
16758 case I387_FLOOR:
16759 case I387_CEIL:
16760 case I387_MASK_PM:
16761 return mode;
16762 default:
16763 gcc_unreachable ();
16767 static int
16768 ix86_avx_u128_mode_entry (void)
16770 tree arg;
16772 /* Entry mode is set to AVX_U128_DIRTY if there are
16773 256bit modes used in function arguments. */
16774 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16775 arg = TREE_CHAIN (arg))
16777 rtx incoming = DECL_INCOMING_RTL (arg);
16779 if (incoming && ix86_check_avx256_register (incoming))
16780 return AVX_U128_DIRTY;
16783 return AVX_U128_CLEAN;
16786 /* Return a mode that ENTITY is assumed to be
16787 switched to at function entry. */
16789 static int
16790 ix86_mode_entry (int entity)
16792 switch (entity)
16794 case AVX_U128:
16795 return ix86_avx_u128_mode_entry ();
16796 case I387_TRUNC:
16797 case I387_FLOOR:
16798 case I387_CEIL:
16799 case I387_MASK_PM:
16800 return I387_CW_ANY;
16801 default:
16802 gcc_unreachable ();
16806 static int
16807 ix86_avx_u128_mode_exit (void)
16809 rtx reg = crtl->return_rtx;
16811 /* Exit mode is set to AVX_U128_DIRTY if there are
16812 256bit modes used in the function return register. */
16813 if (reg && ix86_check_avx256_register (reg))
16814 return AVX_U128_DIRTY;
16816 return AVX_U128_CLEAN;
16819 /* Return a mode that ENTITY is assumed to be
16820 switched to at function exit. */
16822 static int
16823 ix86_mode_exit (int entity)
16825 switch (entity)
16827 case AVX_U128:
16828 return ix86_avx_u128_mode_exit ();
16829 case I387_TRUNC:
16830 case I387_FLOOR:
16831 case I387_CEIL:
16832 case I387_MASK_PM:
16833 return I387_CW_ANY;
16834 default:
16835 gcc_unreachable ();
16839 static int
16840 ix86_mode_priority (int, int n)
16842 return n;
16845 /* Output code to initialize control word copies used by trunc?f?i and
16846 rounding patterns. CURRENT_MODE is set to current control word,
16847 while NEW_MODE is set to new control word. */
16849 static void
16850 emit_i387_cw_initialization (int mode)
16852 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16853 rtx new_mode;
16855 enum ix86_stack_slot slot;
16857 rtx reg = gen_reg_rtx (HImode);
16859 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16860 emit_move_insn (reg, copy_rtx (stored_mode));
16862 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16863 || optimize_insn_for_size_p ())
16865 switch (mode)
16867 case I387_CW_TRUNC:
16868 /* round toward zero (truncate) */
16869 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16870 slot = SLOT_CW_TRUNC;
16871 break;
16873 case I387_CW_FLOOR:
16874 /* round down toward -oo */
16875 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16876 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16877 slot = SLOT_CW_FLOOR;
16878 break;
16880 case I387_CW_CEIL:
16881 /* round up toward +oo */
16882 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16883 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16884 slot = SLOT_CW_CEIL;
16885 break;
16887 case I387_CW_MASK_PM:
16888 /* mask precision exception for nearbyint() */
16889 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16890 slot = SLOT_CW_MASK_PM;
16891 break;
16893 default:
16894 gcc_unreachable ();
16897 else
16899 switch (mode)
16901 case I387_CW_TRUNC:
16902 /* round toward zero (truncate) */
16903 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16904 slot = SLOT_CW_TRUNC;
16905 break;
16907 case I387_CW_FLOOR:
16908 /* round down toward -oo */
16909 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16910 slot = SLOT_CW_FLOOR;
16911 break;
16913 case I387_CW_CEIL:
16914 /* round up toward +oo */
16915 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16916 slot = SLOT_CW_CEIL;
16917 break;
16919 case I387_CW_MASK_PM:
16920 /* mask precision exception for nearbyint() */
16921 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16922 slot = SLOT_CW_MASK_PM;
16923 break;
16925 default:
16926 gcc_unreachable ();
16930 gcc_assert (slot < MAX_386_STACK_LOCALS);
16932 new_mode = assign_386_stack_local (HImode, slot);
16933 emit_move_insn (new_mode, reg);
16936 /* Emit vzeroupper. */
16938 void
16939 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16941 int i;
16943 /* Cancel automatic vzeroupper insertion if there are
16944 live call-saved SSE registers at the insertion point. */
16946 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16947 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16948 return;
16950 if (TARGET_64BIT)
16951 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16952 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16953 return;
16955 emit_insn (gen_avx_vzeroupper ());
16958 /* Generate one or more insns to set ENTITY to MODE. */
16960 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16961 is the set of hard registers live at the point where the insn(s)
16962 are to be inserted. */
16964 static void
16965 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16966 HARD_REG_SET regs_live)
16968 switch (entity)
16970 case AVX_U128:
16971 if (mode == AVX_U128_CLEAN)
16972 ix86_avx_emit_vzeroupper (regs_live);
16973 break;
16974 case I387_TRUNC:
16975 case I387_FLOOR:
16976 case I387_CEIL:
16977 case I387_MASK_PM:
16978 if (mode != I387_CW_ANY
16979 && mode != I387_CW_UNINITIALIZED)
16980 emit_i387_cw_initialization (mode);
16981 break;
16982 default:
16983 gcc_unreachable ();
16987 /* Output code for INSN to convert a float to a signed int. OPERANDS
16988 are the insn operands. The output may be [HSD]Imode and the input
16989 operand may be [SDX]Fmode. */
16991 const char *
16992 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
16994 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16995 int dimode_p = GET_MODE (operands[0]) == DImode;
16996 int round_mode = get_attr_i387_cw (insn);
16998 /* Jump through a hoop or two for DImode, since the hardware has no
16999 non-popping instruction. We used to do this a different way, but
17000 that was somewhat fragile and broke with post-reload splitters. */
17001 if ((dimode_p || fisttp) && !stack_top_dies)
17002 output_asm_insn ("fld\t%y1", operands);
17004 gcc_assert (STACK_TOP_P (operands[1]));
17005 gcc_assert (MEM_P (operands[0]));
17006 gcc_assert (GET_MODE (operands[1]) != TFmode);
17008 if (fisttp)
17009 output_asm_insn ("fisttp%Z0\t%0", operands);
17010 else
17012 if (round_mode != I387_CW_ANY)
17013 output_asm_insn ("fldcw\t%3", operands);
17014 if (stack_top_dies || dimode_p)
17015 output_asm_insn ("fistp%Z0\t%0", operands);
17016 else
17017 output_asm_insn ("fist%Z0\t%0", operands);
17018 if (round_mode != I387_CW_ANY)
17019 output_asm_insn ("fldcw\t%2", operands);
17022 return "";
17025 /* Output code for x87 ffreep insn. The OPNO argument, which may only
17026 have the values zero or one, indicates the ffreep insn's operand
17027 from the OPERANDS array. */
17029 static const char *
17030 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
17032 if (TARGET_USE_FFREEP)
17033 #ifdef HAVE_AS_IX86_FFREEP
17034 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
17035 #else
17037 static char retval[32];
17038 int regno = REGNO (operands[opno]);
17040 gcc_assert (STACK_REGNO_P (regno));
17042 regno -= FIRST_STACK_REG;
17044 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
17045 return retval;
17047 #endif
17049 return opno ? "fstp\t%y1" : "fstp\t%y0";
17053 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
17054 should be used. UNORDERED_P is true when fucom should be used. */
17056 const char *
17057 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
17059 int stack_top_dies;
17060 rtx cmp_op0, cmp_op1;
17061 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
17063 if (eflags_p)
17065 cmp_op0 = operands[0];
17066 cmp_op1 = operands[1];
17068 else
17070 cmp_op0 = operands[1];
17071 cmp_op1 = operands[2];
17074 if (is_sse)
17076 if (GET_MODE (operands[0]) == SFmode)
17077 if (unordered_p)
17078 return "%vucomiss\t{%1, %0|%0, %1}";
17079 else
17080 return "%vcomiss\t{%1, %0|%0, %1}";
17081 else
17082 if (unordered_p)
17083 return "%vucomisd\t{%1, %0|%0, %1}";
17084 else
17085 return "%vcomisd\t{%1, %0|%0, %1}";
17088 gcc_assert (STACK_TOP_P (cmp_op0));
17090 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17092 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
17094 if (stack_top_dies)
17096 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
17097 return output_387_ffreep (operands, 1);
17099 else
17100 return "ftst\n\tfnstsw\t%0";
17103 if (STACK_REG_P (cmp_op1)
17104 && stack_top_dies
17105 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
17106 && REGNO (cmp_op1) != FIRST_STACK_REG)
17108 /* If both the top of the 387 stack dies, and the other operand
17109 is also a stack register that dies, then this must be a
17110 `fcompp' float compare */
17112 if (eflags_p)
17114 /* There is no double popping fcomi variant. Fortunately,
17115 eflags is immune from the fstp's cc clobbering. */
17116 if (unordered_p)
17117 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
17118 else
17119 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
17120 return output_387_ffreep (operands, 0);
17122 else
17124 if (unordered_p)
17125 return "fucompp\n\tfnstsw\t%0";
17126 else
17127 return "fcompp\n\tfnstsw\t%0";
17130 else
17132 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
17134 static const char * const alt[16] =
17136 "fcom%Z2\t%y2\n\tfnstsw\t%0",
17137 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
17138 "fucom%Z2\t%y2\n\tfnstsw\t%0",
17139 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
17141 "ficom%Z2\t%y2\n\tfnstsw\t%0",
17142 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
17143 NULL,
17144 NULL,
17146 "fcomi\t{%y1, %0|%0, %y1}",
17147 "fcomip\t{%y1, %0|%0, %y1}",
17148 "fucomi\t{%y1, %0|%0, %y1}",
17149 "fucomip\t{%y1, %0|%0, %y1}",
17151 NULL,
17152 NULL,
17153 NULL,
17154 NULL
17157 int mask;
17158 const char *ret;
17160 mask = eflags_p << 3;
17161 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17162 mask |= unordered_p << 1;
17163 mask |= stack_top_dies;
17165 gcc_assert (mask < 16);
17166 ret = alt[mask];
17167 gcc_assert (ret);
17169 return ret;
17173 void
17174 ix86_output_addr_vec_elt (FILE *file, int value)
17176 const char *directive = ASM_LONG;
17178 #ifdef ASM_QUAD
17179 if (TARGET_LP64)
17180 directive = ASM_QUAD;
17181 #else
17182 gcc_assert (!TARGET_64BIT);
17183 #endif
17185 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17188 void
17189 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17191 const char *directive = ASM_LONG;
17193 #ifdef ASM_QUAD
17194 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17195 directive = ASM_QUAD;
17196 #else
17197 gcc_assert (!TARGET_64BIT);
17198 #endif
17199 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
17200 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17201 fprintf (file, "%s%s%d-%s%d\n",
17202 directive, LPREFIX, value, LPREFIX, rel);
17203 else if (HAVE_AS_GOTOFF_IN_DATA)
17204 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17205 #if TARGET_MACHO
17206 else if (TARGET_MACHO)
17208 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17209 machopic_output_function_base_name (file);
17210 putc ('\n', file);
17212 #endif
17213 else
17214 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17215 GOT_SYMBOL_NAME, LPREFIX, value);
17218 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17219 for the target. */
17221 void
17222 ix86_expand_clear (rtx dest)
17224 rtx tmp;
17226 /* We play register width games, which are only valid after reload. */
17227 gcc_assert (reload_completed);
17229 /* Avoid HImode and its attendant prefix byte. */
17230 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17231 dest = gen_rtx_REG (SImode, REGNO (dest));
17232 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
17234 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17236 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17237 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17240 emit_insn (tmp);
17243 /* X is an unchanging MEM. If it is a constant pool reference, return
17244 the constant pool rtx, else NULL. */
17247 maybe_get_pool_constant (rtx x)
17249 x = ix86_delegitimize_address (XEXP (x, 0));
17251 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17252 return get_pool_constant (x);
17254 return NULL_RTX;
17257 void
17258 ix86_expand_move (machine_mode mode, rtx operands[])
17260 rtx op0, op1;
17261 enum tls_model model;
17263 op0 = operands[0];
17264 op1 = operands[1];
17266 if (GET_CODE (op1) == SYMBOL_REF)
17268 rtx tmp;
17270 model = SYMBOL_REF_TLS_MODEL (op1);
17271 if (model)
17273 op1 = legitimize_tls_address (op1, model, true);
17274 op1 = force_operand (op1, op0);
17275 if (op1 == op0)
17276 return;
17277 op1 = convert_to_mode (mode, op1, 1);
17279 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17280 op1 = tmp;
17282 else if (GET_CODE (op1) == CONST
17283 && GET_CODE (XEXP (op1, 0)) == PLUS
17284 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17286 rtx addend = XEXP (XEXP (op1, 0), 1);
17287 rtx symbol = XEXP (XEXP (op1, 0), 0);
17288 rtx tmp;
17290 model = SYMBOL_REF_TLS_MODEL (symbol);
17291 if (model)
17292 tmp = legitimize_tls_address (symbol, model, true);
17293 else
17294 tmp = legitimize_pe_coff_symbol (symbol, true);
17296 if (tmp)
17298 tmp = force_operand (tmp, NULL);
17299 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17300 op0, 1, OPTAB_DIRECT);
17301 if (tmp == op0)
17302 return;
17303 op1 = convert_to_mode (mode, tmp, 1);
17307 if ((flag_pic || MACHOPIC_INDIRECT)
17308 && symbolic_operand (op1, mode))
17310 if (TARGET_MACHO && !TARGET_64BIT)
17312 #if TARGET_MACHO
17313 /* dynamic-no-pic */
17314 if (MACHOPIC_INDIRECT)
17316 rtx temp = (op0 && REG_P (op0) && mode == Pmode)
17317 ? op0 : gen_reg_rtx (Pmode);
17318 op1 = machopic_indirect_data_reference (op1, temp);
17319 if (MACHOPIC_PURE)
17320 op1 = machopic_legitimize_pic_address (op1, mode,
17321 temp == op1 ? 0 : temp);
17323 if (op0 != op1 && GET_CODE (op0) != MEM)
17325 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
17326 emit_insn (insn);
17327 return;
17329 if (GET_CODE (op0) == MEM)
17330 op1 = force_reg (Pmode, op1);
17331 else
17333 rtx temp = op0;
17334 if (GET_CODE (temp) != REG)
17335 temp = gen_reg_rtx (Pmode);
17336 temp = legitimize_pic_address (op1, temp);
17337 if (temp == op0)
17338 return;
17339 op1 = temp;
17341 /* dynamic-no-pic */
17342 #endif
17344 else
17346 if (MEM_P (op0))
17347 op1 = force_reg (mode, op1);
17348 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17350 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17351 op1 = legitimize_pic_address (op1, reg);
17352 if (op0 == op1)
17353 return;
17354 op1 = convert_to_mode (mode, op1, 1);
17358 else
17360 if (MEM_P (op0)
17361 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17362 || !push_operand (op0, mode))
17363 && MEM_P (op1))
17364 op1 = force_reg (mode, op1);
17366 if (push_operand (op0, mode)
17367 && ! general_no_elim_operand (op1, mode))
17368 op1 = copy_to_mode_reg (mode, op1);
17370 /* Force large constants in 64bit compilation into register
17371 to get them CSEed. */
17372 if (can_create_pseudo_p ()
17373 && (mode == DImode) && TARGET_64BIT
17374 && immediate_operand (op1, mode)
17375 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17376 && !register_operand (op0, mode)
17377 && optimize)
17378 op1 = copy_to_mode_reg (mode, op1);
17380 if (can_create_pseudo_p ()
17381 && CONST_DOUBLE_P (op1))
17383 /* If we are loading a floating point constant to a register,
17384 force the value to memory now, since we'll get better code
17385 out the back end. */
17387 op1 = validize_mem (force_const_mem (mode, op1));
17388 if (!register_operand (op0, mode))
17390 rtx temp = gen_reg_rtx (mode);
17391 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
17392 emit_move_insn (op0, temp);
17393 return;
17398 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17401 void
17402 ix86_expand_vector_move (machine_mode mode, rtx operands[])
17404 rtx op0 = operands[0], op1 = operands[1];
17405 unsigned int align = GET_MODE_ALIGNMENT (mode);
17407 if (push_operand (op0, VOIDmode))
17408 op0 = emit_move_resolve_push (mode, op0);
17410 /* Force constants other than zero into memory. We do not know how
17411 the instructions used to build constants modify the upper 64 bits
17412 of the register, once we have that information we may be able
17413 to handle some of them more efficiently. */
17414 if (can_create_pseudo_p ()
17415 && register_operand (op0, mode)
17416 && (CONSTANT_P (op1)
17417 || (GET_CODE (op1) == SUBREG
17418 && CONSTANT_P (SUBREG_REG (op1))))
17419 && !standard_sse_constant_p (op1))
17420 op1 = validize_mem (force_const_mem (mode, op1));
17422 /* We need to check memory alignment for SSE mode since attribute
17423 can make operands unaligned. */
17424 if (can_create_pseudo_p ()
17425 && SSE_REG_MODE_P (mode)
17426 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17427 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17429 rtx tmp[2];
17431 /* ix86_expand_vector_move_misalign() does not like constants ... */
17432 if (CONSTANT_P (op1)
17433 || (GET_CODE (op1) == SUBREG
17434 && CONSTANT_P (SUBREG_REG (op1))))
17435 op1 = validize_mem (force_const_mem (mode, op1));
17437 /* ... nor both arguments in memory. */
17438 if (!register_operand (op0, mode)
17439 && !register_operand (op1, mode))
17440 op1 = force_reg (mode, op1);
17442 tmp[0] = op0; tmp[1] = op1;
17443 ix86_expand_vector_move_misalign (mode, tmp);
17444 return;
17447 /* Make operand1 a register if it isn't already. */
17448 if (can_create_pseudo_p ()
17449 && !register_operand (op0, mode)
17450 && !register_operand (op1, mode))
17452 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17453 return;
17456 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17459 /* Split 32-byte AVX unaligned load and store if needed. */
17461 static void
17462 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17464 rtx m;
17465 rtx (*extract) (rtx, rtx, rtx);
17466 rtx (*load_unaligned) (rtx, rtx);
17467 rtx (*store_unaligned) (rtx, rtx);
17468 machine_mode mode;
17470 switch (GET_MODE (op0))
17472 default:
17473 gcc_unreachable ();
17474 case V32QImode:
17475 extract = gen_avx_vextractf128v32qi;
17476 load_unaligned = gen_avx_loaddquv32qi;
17477 store_unaligned = gen_avx_storedquv32qi;
17478 mode = V16QImode;
17479 break;
17480 case V8SFmode:
17481 extract = gen_avx_vextractf128v8sf;
17482 load_unaligned = gen_avx_loadups256;
17483 store_unaligned = gen_avx_storeups256;
17484 mode = V4SFmode;
17485 break;
17486 case V4DFmode:
17487 extract = gen_avx_vextractf128v4df;
17488 load_unaligned = gen_avx_loadupd256;
17489 store_unaligned = gen_avx_storeupd256;
17490 mode = V2DFmode;
17491 break;
17494 if (MEM_P (op1))
17496 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
17497 && optimize_insn_for_speed_p ())
17499 rtx r = gen_reg_rtx (mode);
17500 m = adjust_address (op1, mode, 0);
17501 emit_move_insn (r, m);
17502 m = adjust_address (op1, mode, 16);
17503 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17504 emit_move_insn (op0, r);
17506 /* Normal *mov<mode>_internal pattern will handle
17507 unaligned loads just fine if misaligned_operand
17508 is true, and without the UNSPEC it can be combined
17509 with arithmetic instructions. */
17510 else if (misaligned_operand (op1, GET_MODE (op1)))
17511 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17512 else
17513 emit_insn (load_unaligned (op0, op1));
17515 else if (MEM_P (op0))
17517 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
17518 && optimize_insn_for_speed_p ())
17520 m = adjust_address (op0, mode, 0);
17521 emit_insn (extract (m, op1, const0_rtx));
17522 m = adjust_address (op0, mode, 16);
17523 emit_insn (extract (m, op1, const1_rtx));
17525 else
17526 emit_insn (store_unaligned (op0, op1));
17528 else
17529 gcc_unreachable ();
17532 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17533 straight to ix86_expand_vector_move. */
17534 /* Code generation for scalar reg-reg moves of single and double precision data:
17535 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17536 movaps reg, reg
17537 else
17538 movss reg, reg
17539 if (x86_sse_partial_reg_dependency == true)
17540 movapd reg, reg
17541 else
17542 movsd reg, reg
17544 Code generation for scalar loads of double precision data:
17545 if (x86_sse_split_regs == true)
17546 movlpd mem, reg (gas syntax)
17547 else
17548 movsd mem, reg
17550 Code generation for unaligned packed loads of single precision data
17551 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17552 if (x86_sse_unaligned_move_optimal)
17553 movups mem, reg
17555 if (x86_sse_partial_reg_dependency == true)
17557 xorps reg, reg
17558 movlps mem, reg
17559 movhps mem+8, reg
17561 else
17563 movlps mem, reg
17564 movhps mem+8, reg
17567 Code generation for unaligned packed loads of double precision data
17568 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17569 if (x86_sse_unaligned_move_optimal)
17570 movupd mem, reg
17572 if (x86_sse_split_regs == true)
17574 movlpd mem, reg
17575 movhpd mem+8, reg
17577 else
17579 movsd mem, reg
17580 movhpd mem+8, reg
17584 void
17585 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17587 rtx op0, op1, orig_op0 = NULL_RTX, m;
17588 rtx (*load_unaligned) (rtx, rtx);
17589 rtx (*store_unaligned) (rtx, rtx);
17591 op0 = operands[0];
17592 op1 = operands[1];
17594 if (GET_MODE_SIZE (mode) == 64)
17596 switch (GET_MODE_CLASS (mode))
17598 case MODE_VECTOR_INT:
17599 case MODE_INT:
17600 if (GET_MODE (op0) != V16SImode)
17602 if (!MEM_P (op0))
17604 orig_op0 = op0;
17605 op0 = gen_reg_rtx (V16SImode);
17607 else
17608 op0 = gen_lowpart (V16SImode, op0);
17610 op1 = gen_lowpart (V16SImode, op1);
17611 /* FALLTHRU */
17613 case MODE_VECTOR_FLOAT:
17614 switch (GET_MODE (op0))
17616 default:
17617 gcc_unreachable ();
17618 case V16SImode:
17619 load_unaligned = gen_avx512f_loaddquv16si;
17620 store_unaligned = gen_avx512f_storedquv16si;
17621 break;
17622 case V16SFmode:
17623 load_unaligned = gen_avx512f_loadups512;
17624 store_unaligned = gen_avx512f_storeups512;
17625 break;
17626 case V8DFmode:
17627 load_unaligned = gen_avx512f_loadupd512;
17628 store_unaligned = gen_avx512f_storeupd512;
17629 break;
17632 if (MEM_P (op1))
17633 emit_insn (load_unaligned (op0, op1));
17634 else if (MEM_P (op0))
17635 emit_insn (store_unaligned (op0, op1));
17636 else
17637 gcc_unreachable ();
17638 if (orig_op0)
17639 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17640 break;
17642 default:
17643 gcc_unreachable ();
17646 return;
17649 if (TARGET_AVX
17650 && GET_MODE_SIZE (mode) == 32)
17652 switch (GET_MODE_CLASS (mode))
17654 case MODE_VECTOR_INT:
17655 case MODE_INT:
17656 if (GET_MODE (op0) != V32QImode)
17658 if (!MEM_P (op0))
17660 orig_op0 = op0;
17661 op0 = gen_reg_rtx (V32QImode);
17663 else
17664 op0 = gen_lowpart (V32QImode, op0);
17666 op1 = gen_lowpart (V32QImode, op1);
17667 /* FALLTHRU */
17669 case MODE_VECTOR_FLOAT:
17670 ix86_avx256_split_vector_move_misalign (op0, op1);
17671 if (orig_op0)
17672 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17673 break;
17675 default:
17676 gcc_unreachable ();
17679 return;
17682 if (MEM_P (op1))
17684 /* Normal *mov<mode>_internal pattern will handle
17685 unaligned loads just fine if misaligned_operand
17686 is true, and without the UNSPEC it can be combined
17687 with arithmetic instructions. */
17688 if (TARGET_AVX
17689 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17690 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17691 && misaligned_operand (op1, GET_MODE (op1)))
17692 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17693 /* ??? If we have typed data, then it would appear that using
17694 movdqu is the only way to get unaligned data loaded with
17695 integer type. */
17696 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17698 if (GET_MODE (op0) != V16QImode)
17700 orig_op0 = op0;
17701 op0 = gen_reg_rtx (V16QImode);
17703 op1 = gen_lowpart (V16QImode, op1);
17704 /* We will eventually emit movups based on insn attributes. */
17705 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17706 if (orig_op0)
17707 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17709 else if (TARGET_SSE2 && mode == V2DFmode)
17711 rtx zero;
17713 if (TARGET_AVX
17714 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17715 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17716 || optimize_insn_for_size_p ())
17718 /* We will eventually emit movups based on insn attributes. */
17719 emit_insn (gen_sse2_loadupd (op0, op1));
17720 return;
17723 /* When SSE registers are split into halves, we can avoid
17724 writing to the top half twice. */
17725 if (TARGET_SSE_SPLIT_REGS)
17727 emit_clobber (op0);
17728 zero = op0;
17730 else
17732 /* ??? Not sure about the best option for the Intel chips.
17733 The following would seem to satisfy; the register is
17734 entirely cleared, breaking the dependency chain. We
17735 then store to the upper half, with a dependency depth
17736 of one. A rumor has it that Intel recommends two movsd
17737 followed by an unpacklpd, but this is unconfirmed. And
17738 given that the dependency depth of the unpacklpd would
17739 still be one, I'm not sure why this would be better. */
17740 zero = CONST0_RTX (V2DFmode);
17743 m = adjust_address (op1, DFmode, 0);
17744 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17745 m = adjust_address (op1, DFmode, 8);
17746 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17748 else
17750 rtx t;
17752 if (TARGET_AVX
17753 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17754 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17755 || optimize_insn_for_size_p ())
17757 if (GET_MODE (op0) != V4SFmode)
17759 orig_op0 = op0;
17760 op0 = gen_reg_rtx (V4SFmode);
17762 op1 = gen_lowpart (V4SFmode, op1);
17763 emit_insn (gen_sse_loadups (op0, op1));
17764 if (orig_op0)
17765 emit_move_insn (orig_op0,
17766 gen_lowpart (GET_MODE (orig_op0), op0));
17767 return;
17770 if (mode != V4SFmode)
17771 t = gen_reg_rtx (V4SFmode);
17772 else
17773 t = op0;
17775 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17776 emit_move_insn (t, CONST0_RTX (V4SFmode));
17777 else
17778 emit_clobber (t);
17780 m = adjust_address (op1, V2SFmode, 0);
17781 emit_insn (gen_sse_loadlps (t, t, m));
17782 m = adjust_address (op1, V2SFmode, 8);
17783 emit_insn (gen_sse_loadhps (t, t, m));
17784 if (mode != V4SFmode)
17785 emit_move_insn (op0, gen_lowpart (mode, t));
17788 else if (MEM_P (op0))
17790 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17792 op0 = gen_lowpart (V16QImode, op0);
17793 op1 = gen_lowpart (V16QImode, op1);
17794 /* We will eventually emit movups based on insn attributes. */
17795 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17797 else if (TARGET_SSE2 && mode == V2DFmode)
17799 if (TARGET_AVX
17800 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17801 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17802 || optimize_insn_for_size_p ())
17803 /* We will eventually emit movups based on insn attributes. */
17804 emit_insn (gen_sse2_storeupd (op0, op1));
17805 else
17807 m = adjust_address (op0, DFmode, 0);
17808 emit_insn (gen_sse2_storelpd (m, op1));
17809 m = adjust_address (op0, DFmode, 8);
17810 emit_insn (gen_sse2_storehpd (m, op1));
17813 else
17815 if (mode != V4SFmode)
17816 op1 = gen_lowpart (V4SFmode, op1);
17818 if (TARGET_AVX
17819 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17820 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17821 || optimize_insn_for_size_p ())
17823 op0 = gen_lowpart (V4SFmode, op0);
17824 emit_insn (gen_sse_storeups (op0, op1));
17826 else
17828 m = adjust_address (op0, V2SFmode, 0);
17829 emit_insn (gen_sse_storelps (m, op1));
17830 m = adjust_address (op0, V2SFmode, 8);
17831 emit_insn (gen_sse_storehps (m, op1));
17835 else
17836 gcc_unreachable ();
17839 /* Helper function of ix86_fixup_binary_operands to canonicalize
17840 operand order. Returns true if the operands should be swapped. */
17842 static bool
17843 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
17844 rtx operands[])
17846 rtx dst = operands[0];
17847 rtx src1 = operands[1];
17848 rtx src2 = operands[2];
17850 /* If the operation is not commutative, we can't do anything. */
17851 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17852 return false;
17854 /* Highest priority is that src1 should match dst. */
17855 if (rtx_equal_p (dst, src1))
17856 return false;
17857 if (rtx_equal_p (dst, src2))
17858 return true;
17860 /* Next highest priority is that immediate constants come second. */
17861 if (immediate_operand (src2, mode))
17862 return false;
17863 if (immediate_operand (src1, mode))
17864 return true;
17866 /* Lowest priority is that memory references should come second. */
17867 if (MEM_P (src2))
17868 return false;
17869 if (MEM_P (src1))
17870 return true;
17872 return false;
17876 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17877 destination to use for the operation. If different from the true
17878 destination in operands[0], a copy operation will be required. */
17881 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
17882 rtx operands[])
17884 rtx dst = operands[0];
17885 rtx src1 = operands[1];
17886 rtx src2 = operands[2];
17888 /* Canonicalize operand order. */
17889 if (ix86_swap_binary_operands_p (code, mode, operands))
17891 /* It is invalid to swap operands of different modes. */
17892 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17894 std::swap (src1, src2);
17897 /* Both source operands cannot be in memory. */
17898 if (MEM_P (src1) && MEM_P (src2))
17900 /* Optimization: Only read from memory once. */
17901 if (rtx_equal_p (src1, src2))
17903 src2 = force_reg (mode, src2);
17904 src1 = src2;
17906 else if (rtx_equal_p (dst, src1))
17907 src2 = force_reg (mode, src2);
17908 else
17909 src1 = force_reg (mode, src1);
17912 /* If the destination is memory, and we do not have matching source
17913 operands, do things in registers. */
17914 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17915 dst = gen_reg_rtx (mode);
17917 /* Source 1 cannot be a constant. */
17918 if (CONSTANT_P (src1))
17919 src1 = force_reg (mode, src1);
17921 /* Source 1 cannot be a non-matching memory. */
17922 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17923 src1 = force_reg (mode, src1);
17925 /* Improve address combine. */
17926 if (code == PLUS
17927 && GET_MODE_CLASS (mode) == MODE_INT
17928 && MEM_P (src2))
17929 src2 = force_reg (mode, src2);
17931 operands[1] = src1;
17932 operands[2] = src2;
17933 return dst;
17936 /* Similarly, but assume that the destination has already been
17937 set up properly. */
17939 void
17940 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17941 machine_mode mode, rtx operands[])
17943 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17944 gcc_assert (dst == operands[0]);
17947 /* Attempt to expand a binary operator. Make the expansion closer to the
17948 actual machine, then just general_operand, which will allow 3 separate
17949 memory references (one output, two input) in a single insn. */
17951 void
17952 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
17953 rtx operands[])
17955 rtx src1, src2, dst, op, clob;
17957 dst = ix86_fixup_binary_operands (code, mode, operands);
17958 src1 = operands[1];
17959 src2 = operands[2];
17961 /* Emit the instruction. */
17963 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17965 if (reload_completed
17966 && code == PLUS
17967 && !rtx_equal_p (dst, src1))
17969 /* This is going to be an LEA; avoid splitting it later. */
17970 emit_insn (op);
17972 else
17974 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17975 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17978 /* Fix up the destination if needed. */
17979 if (dst != operands[0])
17980 emit_move_insn (operands[0], dst);
17983 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17984 the given OPERANDS. */
17986 void
17987 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
17988 rtx operands[])
17990 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17991 if (GET_CODE (operands[1]) == SUBREG)
17993 op1 = operands[1];
17994 op2 = operands[2];
17996 else if (GET_CODE (operands[2]) == SUBREG)
17998 op1 = operands[2];
17999 op2 = operands[1];
18001 /* Optimize (__m128i) d | (__m128i) e and similar code
18002 when d and e are float vectors into float vector logical
18003 insn. In C/C++ without using intrinsics there is no other way
18004 to express vector logical operation on float vectors than
18005 to cast them temporarily to integer vectors. */
18006 if (op1
18007 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
18008 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
18009 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
18010 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
18011 && SUBREG_BYTE (op1) == 0
18012 && (GET_CODE (op2) == CONST_VECTOR
18013 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
18014 && SUBREG_BYTE (op2) == 0))
18015 && can_create_pseudo_p ())
18017 rtx dst;
18018 switch (GET_MODE (SUBREG_REG (op1)))
18020 case V4SFmode:
18021 case V8SFmode:
18022 case V16SFmode:
18023 case V2DFmode:
18024 case V4DFmode:
18025 case V8DFmode:
18026 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
18027 if (GET_CODE (op2) == CONST_VECTOR)
18029 op2 = gen_lowpart (GET_MODE (dst), op2);
18030 op2 = force_reg (GET_MODE (dst), op2);
18032 else
18034 op1 = operands[1];
18035 op2 = SUBREG_REG (operands[2]);
18036 if (!nonimmediate_operand (op2, GET_MODE (dst)))
18037 op2 = force_reg (GET_MODE (dst), op2);
18039 op1 = SUBREG_REG (op1);
18040 if (!nonimmediate_operand (op1, GET_MODE (dst)))
18041 op1 = force_reg (GET_MODE (dst), op1);
18042 emit_insn (gen_rtx_SET (VOIDmode, dst,
18043 gen_rtx_fmt_ee (code, GET_MODE (dst),
18044 op1, op2)));
18045 emit_move_insn (operands[0], gen_lowpart (mode, dst));
18046 return;
18047 default:
18048 break;
18051 if (!nonimmediate_operand (operands[1], mode))
18052 operands[1] = force_reg (mode, operands[1]);
18053 if (!nonimmediate_operand (operands[2], mode))
18054 operands[2] = force_reg (mode, operands[2]);
18055 ix86_fixup_binary_operands_no_copy (code, mode, operands);
18056 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18057 gen_rtx_fmt_ee (code, mode, operands[1],
18058 operands[2])));
18061 /* Return TRUE or FALSE depending on whether the binary operator meets the
18062 appropriate constraints. */
18064 bool
18065 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
18066 rtx operands[3])
18068 rtx dst = operands[0];
18069 rtx src1 = operands[1];
18070 rtx src2 = operands[2];
18072 /* Both source operands cannot be in memory. */
18073 if (MEM_P (src1) && MEM_P (src2))
18074 return false;
18076 /* Canonicalize operand order for commutative operators. */
18077 if (ix86_swap_binary_operands_p (code, mode, operands))
18078 std::swap (src1, src2);
18080 /* If the destination is memory, we must have a matching source operand. */
18081 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18082 return false;
18084 /* Source 1 cannot be a constant. */
18085 if (CONSTANT_P (src1))
18086 return false;
18088 /* Source 1 cannot be a non-matching memory. */
18089 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18090 /* Support "andhi/andsi/anddi" as a zero-extending move. */
18091 return (code == AND
18092 && (mode == HImode
18093 || mode == SImode
18094 || (TARGET_64BIT && mode == DImode))
18095 && satisfies_constraint_L (src2));
18097 return true;
18100 /* Attempt to expand a unary operator. Make the expansion closer to the
18101 actual machine, then just general_operand, which will allow 2 separate
18102 memory references (one output, one input) in a single insn. */
18104 void
18105 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
18106 rtx operands[])
18108 bool matching_memory = false;
18109 rtx src, dst, op, clob;
18111 dst = operands[0];
18112 src = operands[1];
18114 /* If the destination is memory, and we do not have matching source
18115 operands, do things in registers. */
18116 if (MEM_P (dst))
18118 if (rtx_equal_p (dst, src))
18119 matching_memory = true;
18120 else
18121 dst = gen_reg_rtx (mode);
18124 /* When source operand is memory, destination must match. */
18125 if (MEM_P (src) && !matching_memory)
18126 src = force_reg (mode, src);
18128 /* Emit the instruction. */
18130 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
18132 if (code == NOT)
18133 emit_insn (op);
18134 else
18136 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18137 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18140 /* Fix up the destination if needed. */
18141 if (dst != operands[0])
18142 emit_move_insn (operands[0], dst);
18145 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18146 divisor are within the range [0-255]. */
18148 void
18149 ix86_split_idivmod (machine_mode mode, rtx operands[],
18150 bool signed_p)
18152 rtx_code_label *end_label, *qimode_label;
18153 rtx insn, div, mod;
18154 rtx scratch, tmp0, tmp1, tmp2;
18155 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18156 rtx (*gen_zero_extend) (rtx, rtx);
18157 rtx (*gen_test_ccno_1) (rtx, rtx);
18159 switch (mode)
18161 case SImode:
18162 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18163 gen_test_ccno_1 = gen_testsi_ccno_1;
18164 gen_zero_extend = gen_zero_extendqisi2;
18165 break;
18166 case DImode:
18167 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18168 gen_test_ccno_1 = gen_testdi_ccno_1;
18169 gen_zero_extend = gen_zero_extendqidi2;
18170 break;
18171 default:
18172 gcc_unreachable ();
18175 end_label = gen_label_rtx ();
18176 qimode_label = gen_label_rtx ();
18178 scratch = gen_reg_rtx (mode);
18180 /* Use 8bit unsigned divimod if dividend and divisor are within
18181 the range [0-255]. */
18182 emit_move_insn (scratch, operands[2]);
18183 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18184 scratch, 1, OPTAB_DIRECT);
18185 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18186 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18187 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18188 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18189 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18190 pc_rtx);
18191 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
18192 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18193 JUMP_LABEL (insn) = qimode_label;
18195 /* Generate original signed/unsigned divimod. */
18196 div = gen_divmod4_1 (operands[0], operands[1],
18197 operands[2], operands[3]);
18198 emit_insn (div);
18200 /* Branch to the end. */
18201 emit_jump_insn (gen_jump (end_label));
18202 emit_barrier ();
18204 /* Generate 8bit unsigned divide. */
18205 emit_label (qimode_label);
18206 /* Don't use operands[0] for result of 8bit divide since not all
18207 registers support QImode ZERO_EXTRACT. */
18208 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18209 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18210 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18211 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18213 if (signed_p)
18215 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18216 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18218 else
18220 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18221 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18224 /* Extract remainder from AH. */
18225 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18226 if (REG_P (operands[1]))
18227 insn = emit_move_insn (operands[1], tmp1);
18228 else
18230 /* Need a new scratch register since the old one has result
18231 of 8bit divide. */
18232 scratch = gen_reg_rtx (mode);
18233 emit_move_insn (scratch, tmp1);
18234 insn = emit_move_insn (operands[1], scratch);
18236 set_unique_reg_note (insn, REG_EQUAL, mod);
18238 /* Zero extend quotient from AL. */
18239 tmp1 = gen_lowpart (QImode, tmp0);
18240 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18241 set_unique_reg_note (insn, REG_EQUAL, div);
18243 emit_label (end_label);
18246 #define LEA_MAX_STALL (3)
18247 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18249 /* Increase given DISTANCE in half-cycles according to
18250 dependencies between PREV and NEXT instructions.
18251 Add 1 half-cycle if there is no dependency and
18252 go to next cycle if there is some dependecy. */
18254 static unsigned int
18255 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18257 df_ref def, use;
18259 if (!prev || !next)
18260 return distance + (distance & 1) + 2;
18262 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18263 return distance + 1;
18265 FOR_EACH_INSN_USE (use, next)
18266 FOR_EACH_INSN_DEF (def, prev)
18267 if (!DF_REF_IS_ARTIFICIAL (def)
18268 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18269 return distance + (distance & 1) + 2;
18271 return distance + 1;
18274 /* Function checks if instruction INSN defines register number
18275 REGNO1 or REGNO2. */
18277 static bool
18278 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18279 rtx_insn *insn)
18281 df_ref def;
18283 FOR_EACH_INSN_DEF (def, insn)
18284 if (DF_REF_REG_DEF_P (def)
18285 && !DF_REF_IS_ARTIFICIAL (def)
18286 && (regno1 == DF_REF_REGNO (def)
18287 || regno2 == DF_REF_REGNO (def)))
18288 return true;
18290 return false;
18293 /* Function checks if instruction INSN uses register number
18294 REGNO as a part of address expression. */
18296 static bool
18297 insn_uses_reg_mem (unsigned int regno, rtx insn)
18299 df_ref use;
18301 FOR_EACH_INSN_USE (use, insn)
18302 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18303 return true;
18305 return false;
18308 /* Search backward for non-agu definition of register number REGNO1
18309 or register number REGNO2 in basic block starting from instruction
18310 START up to head of basic block or instruction INSN.
18312 Function puts true value into *FOUND var if definition was found
18313 and false otherwise.
18315 Distance in half-cycles between START and found instruction or head
18316 of BB is added to DISTANCE and returned. */
18318 static int
18319 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18320 rtx_insn *insn, int distance,
18321 rtx_insn *start, bool *found)
18323 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18324 rtx_insn *prev = start;
18325 rtx_insn *next = NULL;
18327 *found = false;
18329 while (prev
18330 && prev != insn
18331 && distance < LEA_SEARCH_THRESHOLD)
18333 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18335 distance = increase_distance (prev, next, distance);
18336 if (insn_defines_reg (regno1, regno2, prev))
18338 if (recog_memoized (prev) < 0
18339 || get_attr_type (prev) != TYPE_LEA)
18341 *found = true;
18342 return distance;
18346 next = prev;
18348 if (prev == BB_HEAD (bb))
18349 break;
18351 prev = PREV_INSN (prev);
18354 return distance;
18357 /* Search backward for non-agu definition of register number REGNO1
18358 or register number REGNO2 in INSN's basic block until
18359 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18360 2. Reach neighbour BBs boundary, or
18361 3. Reach agu definition.
18362 Returns the distance between the non-agu definition point and INSN.
18363 If no definition point, returns -1. */
18365 static int
18366 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18367 rtx_insn *insn)
18369 basic_block bb = BLOCK_FOR_INSN (insn);
18370 int distance = 0;
18371 bool found = false;
18373 if (insn != BB_HEAD (bb))
18374 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18375 distance, PREV_INSN (insn),
18376 &found);
18378 if (!found && distance < LEA_SEARCH_THRESHOLD)
18380 edge e;
18381 edge_iterator ei;
18382 bool simple_loop = false;
18384 FOR_EACH_EDGE (e, ei, bb->preds)
18385 if (e->src == bb)
18387 simple_loop = true;
18388 break;
18391 if (simple_loop)
18392 distance = distance_non_agu_define_in_bb (regno1, regno2,
18393 insn, distance,
18394 BB_END (bb), &found);
18395 else
18397 int shortest_dist = -1;
18398 bool found_in_bb = false;
18400 FOR_EACH_EDGE (e, ei, bb->preds)
18402 int bb_dist
18403 = distance_non_agu_define_in_bb (regno1, regno2,
18404 insn, distance,
18405 BB_END (e->src),
18406 &found_in_bb);
18407 if (found_in_bb)
18409 if (shortest_dist < 0)
18410 shortest_dist = bb_dist;
18411 else if (bb_dist > 0)
18412 shortest_dist = MIN (bb_dist, shortest_dist);
18414 found = true;
18418 distance = shortest_dist;
18422 /* get_attr_type may modify recog data. We want to make sure
18423 that recog data is valid for instruction INSN, on which
18424 distance_non_agu_define is called. INSN is unchanged here. */
18425 extract_insn_cached (insn);
18427 if (!found)
18428 return -1;
18430 return distance >> 1;
18433 /* Return the distance in half-cycles between INSN and the next
18434 insn that uses register number REGNO in memory address added
18435 to DISTANCE. Return -1 if REGNO0 is set.
18437 Put true value into *FOUND if register usage was found and
18438 false otherwise.
18439 Put true value into *REDEFINED if register redefinition was
18440 found and false otherwise. */
18442 static int
18443 distance_agu_use_in_bb (unsigned int regno,
18444 rtx_insn *insn, int distance, rtx_insn *start,
18445 bool *found, bool *redefined)
18447 basic_block bb = NULL;
18448 rtx_insn *next = start;
18449 rtx_insn *prev = NULL;
18451 *found = false;
18452 *redefined = false;
18454 if (start != NULL_RTX)
18456 bb = BLOCK_FOR_INSN (start);
18457 if (start != BB_HEAD (bb))
18458 /* If insn and start belong to the same bb, set prev to insn,
18459 so the call to increase_distance will increase the distance
18460 between insns by 1. */
18461 prev = insn;
18464 while (next
18465 && next != insn
18466 && distance < LEA_SEARCH_THRESHOLD)
18468 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18470 distance = increase_distance(prev, next, distance);
18471 if (insn_uses_reg_mem (regno, next))
18473 /* Return DISTANCE if OP0 is used in memory
18474 address in NEXT. */
18475 *found = true;
18476 return distance;
18479 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18481 /* Return -1 if OP0 is set in NEXT. */
18482 *redefined = true;
18483 return -1;
18486 prev = next;
18489 if (next == BB_END (bb))
18490 break;
18492 next = NEXT_INSN (next);
18495 return distance;
18498 /* Return the distance between INSN and the next insn that uses
18499 register number REGNO0 in memory address. Return -1 if no such
18500 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18502 static int
18503 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18505 basic_block bb = BLOCK_FOR_INSN (insn);
18506 int distance = 0;
18507 bool found = false;
18508 bool redefined = false;
18510 if (insn != BB_END (bb))
18511 distance = distance_agu_use_in_bb (regno0, insn, distance,
18512 NEXT_INSN (insn),
18513 &found, &redefined);
18515 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18517 edge e;
18518 edge_iterator ei;
18519 bool simple_loop = false;
18521 FOR_EACH_EDGE (e, ei, bb->succs)
18522 if (e->dest == bb)
18524 simple_loop = true;
18525 break;
18528 if (simple_loop)
18529 distance = distance_agu_use_in_bb (regno0, insn,
18530 distance, BB_HEAD (bb),
18531 &found, &redefined);
18532 else
18534 int shortest_dist = -1;
18535 bool found_in_bb = false;
18536 bool redefined_in_bb = false;
18538 FOR_EACH_EDGE (e, ei, bb->succs)
18540 int bb_dist
18541 = distance_agu_use_in_bb (regno0, insn,
18542 distance, BB_HEAD (e->dest),
18543 &found_in_bb, &redefined_in_bb);
18544 if (found_in_bb)
18546 if (shortest_dist < 0)
18547 shortest_dist = bb_dist;
18548 else if (bb_dist > 0)
18549 shortest_dist = MIN (bb_dist, shortest_dist);
18551 found = true;
18555 distance = shortest_dist;
18559 if (!found || redefined)
18560 return -1;
18562 return distance >> 1;
18565 /* Define this macro to tune LEA priority vs ADD, it take effect when
18566 there is a dilemma of choicing LEA or ADD
18567 Negative value: ADD is more preferred than LEA
18568 Zero: Netrual
18569 Positive value: LEA is more preferred than ADD*/
18570 #define IX86_LEA_PRIORITY 0
18572 /* Return true if usage of lea INSN has performance advantage
18573 over a sequence of instructions. Instructions sequence has
18574 SPLIT_COST cycles higher latency than lea latency. */
18576 static bool
18577 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18578 unsigned int regno2, int split_cost, bool has_scale)
18580 int dist_define, dist_use;
18582 /* For Silvermont if using a 2-source or 3-source LEA for
18583 non-destructive destination purposes, or due to wanting
18584 ability to use SCALE, the use of LEA is justified. */
18585 if (TARGET_SILVERMONT || TARGET_INTEL)
18587 if (has_scale)
18588 return true;
18589 if (split_cost < 1)
18590 return false;
18591 if (regno0 == regno1 || regno0 == regno2)
18592 return false;
18593 return true;
18596 dist_define = distance_non_agu_define (regno1, regno2, insn);
18597 dist_use = distance_agu_use (regno0, insn);
18599 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18601 /* If there is no non AGU operand definition, no AGU
18602 operand usage and split cost is 0 then both lea
18603 and non lea variants have same priority. Currently
18604 we prefer lea for 64 bit code and non lea on 32 bit
18605 code. */
18606 if (dist_use < 0 && split_cost == 0)
18607 return TARGET_64BIT || IX86_LEA_PRIORITY;
18608 else
18609 return true;
18612 /* With longer definitions distance lea is more preferable.
18613 Here we change it to take into account splitting cost and
18614 lea priority. */
18615 dist_define += split_cost + IX86_LEA_PRIORITY;
18617 /* If there is no use in memory addess then we just check
18618 that split cost exceeds AGU stall. */
18619 if (dist_use < 0)
18620 return dist_define > LEA_MAX_STALL;
18622 /* If this insn has both backward non-agu dependence and forward
18623 agu dependence, the one with short distance takes effect. */
18624 return dist_define >= dist_use;
18627 /* Return true if it is legal to clobber flags by INSN and
18628 false otherwise. */
18630 static bool
18631 ix86_ok_to_clobber_flags (rtx_insn *insn)
18633 basic_block bb = BLOCK_FOR_INSN (insn);
18634 df_ref use;
18635 bitmap live;
18637 while (insn)
18639 if (NONDEBUG_INSN_P (insn))
18641 FOR_EACH_INSN_USE (use, insn)
18642 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18643 return false;
18645 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18646 return true;
18649 if (insn == BB_END (bb))
18650 break;
18652 insn = NEXT_INSN (insn);
18655 live = df_get_live_out(bb);
18656 return !REGNO_REG_SET_P (live, FLAGS_REG);
18659 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18660 move and add to avoid AGU stalls. */
18662 bool
18663 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18665 unsigned int regno0, regno1, regno2;
18667 /* Check if we need to optimize. */
18668 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18669 return false;
18671 /* Check it is correct to split here. */
18672 if (!ix86_ok_to_clobber_flags(insn))
18673 return false;
18675 regno0 = true_regnum (operands[0]);
18676 regno1 = true_regnum (operands[1]);
18677 regno2 = true_regnum (operands[2]);
18679 /* We need to split only adds with non destructive
18680 destination operand. */
18681 if (regno0 == regno1 || regno0 == regno2)
18682 return false;
18683 else
18684 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18687 /* Return true if we should emit lea instruction instead of mov
18688 instruction. */
18690 bool
18691 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18693 unsigned int regno0, regno1;
18695 /* Check if we need to optimize. */
18696 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18697 return false;
18699 /* Use lea for reg to reg moves only. */
18700 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18701 return false;
18703 regno0 = true_regnum (operands[0]);
18704 regno1 = true_regnum (operands[1]);
18706 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18709 /* Return true if we need to split lea into a sequence of
18710 instructions to avoid AGU stalls. */
18712 bool
18713 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18715 unsigned int regno0, regno1, regno2;
18716 int split_cost;
18717 struct ix86_address parts;
18718 int ok;
18720 /* Check we need to optimize. */
18721 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18722 return false;
18724 /* The "at least two components" test below might not catch simple
18725 move or zero extension insns if parts.base is non-NULL and parts.disp
18726 is const0_rtx as the only components in the address, e.g. if the
18727 register is %rbp or %r13. As this test is much cheaper and moves or
18728 zero extensions are the common case, do this check first. */
18729 if (REG_P (operands[1])
18730 || (SImode_address_operand (operands[1], VOIDmode)
18731 && REG_P (XEXP (operands[1], 0))))
18732 return false;
18734 /* Check if it is OK to split here. */
18735 if (!ix86_ok_to_clobber_flags (insn))
18736 return false;
18738 ok = ix86_decompose_address (operands[1], &parts);
18739 gcc_assert (ok);
18741 /* There should be at least two components in the address. */
18742 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18743 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18744 return false;
18746 /* We should not split into add if non legitimate pic
18747 operand is used as displacement. */
18748 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18749 return false;
18751 regno0 = true_regnum (operands[0]) ;
18752 regno1 = INVALID_REGNUM;
18753 regno2 = INVALID_REGNUM;
18755 if (parts.base)
18756 regno1 = true_regnum (parts.base);
18757 if (parts.index)
18758 regno2 = true_regnum (parts.index);
18760 split_cost = 0;
18762 /* Compute how many cycles we will add to execution time
18763 if split lea into a sequence of instructions. */
18764 if (parts.base || parts.index)
18766 /* Have to use mov instruction if non desctructive
18767 destination form is used. */
18768 if (regno1 != regno0 && regno2 != regno0)
18769 split_cost += 1;
18771 /* Have to add index to base if both exist. */
18772 if (parts.base && parts.index)
18773 split_cost += 1;
18775 /* Have to use shift and adds if scale is 2 or greater. */
18776 if (parts.scale > 1)
18778 if (regno0 != regno1)
18779 split_cost += 1;
18780 else if (regno2 == regno0)
18781 split_cost += 4;
18782 else
18783 split_cost += parts.scale;
18786 /* Have to use add instruction with immediate if
18787 disp is non zero. */
18788 if (parts.disp && parts.disp != const0_rtx)
18789 split_cost += 1;
18791 /* Subtract the price of lea. */
18792 split_cost -= 1;
18795 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18796 parts.scale > 1);
18799 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18800 matches destination. RTX includes clobber of FLAGS_REG. */
18802 static void
18803 ix86_emit_binop (enum rtx_code code, machine_mode mode,
18804 rtx dst, rtx src)
18806 rtx op, clob;
18808 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18809 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18811 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18814 /* Return true if regno1 def is nearest to the insn. */
18816 static bool
18817 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18819 rtx_insn *prev = insn;
18820 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18822 if (insn == start)
18823 return false;
18824 while (prev && prev != start)
18826 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18828 prev = PREV_INSN (prev);
18829 continue;
18831 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18832 return true;
18833 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18834 return false;
18835 prev = PREV_INSN (prev);
18838 /* None of the regs is defined in the bb. */
18839 return false;
18842 /* Split lea instructions into a sequence of instructions
18843 which are executed on ALU to avoid AGU stalls.
18844 It is assumed that it is allowed to clobber flags register
18845 at lea position. */
18847 void
18848 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
18850 unsigned int regno0, regno1, regno2;
18851 struct ix86_address parts;
18852 rtx target, tmp;
18853 int ok, adds;
18855 ok = ix86_decompose_address (operands[1], &parts);
18856 gcc_assert (ok);
18858 target = gen_lowpart (mode, operands[0]);
18860 regno0 = true_regnum (target);
18861 regno1 = INVALID_REGNUM;
18862 regno2 = INVALID_REGNUM;
18864 if (parts.base)
18866 parts.base = gen_lowpart (mode, parts.base);
18867 regno1 = true_regnum (parts.base);
18870 if (parts.index)
18872 parts.index = gen_lowpart (mode, parts.index);
18873 regno2 = true_regnum (parts.index);
18876 if (parts.disp)
18877 parts.disp = gen_lowpart (mode, parts.disp);
18879 if (parts.scale > 1)
18881 /* Case r1 = r1 + ... */
18882 if (regno1 == regno0)
18884 /* If we have a case r1 = r1 + C * r2 then we
18885 should use multiplication which is very
18886 expensive. Assume cost model is wrong if we
18887 have such case here. */
18888 gcc_assert (regno2 != regno0);
18890 for (adds = parts.scale; adds > 0; adds--)
18891 ix86_emit_binop (PLUS, mode, target, parts.index);
18893 else
18895 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18896 if (regno0 != regno2)
18897 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18899 /* Use shift for scaling. */
18900 ix86_emit_binop (ASHIFT, mode, target,
18901 GEN_INT (exact_log2 (parts.scale)));
18903 if (parts.base)
18904 ix86_emit_binop (PLUS, mode, target, parts.base);
18906 if (parts.disp && parts.disp != const0_rtx)
18907 ix86_emit_binop (PLUS, mode, target, parts.disp);
18910 else if (!parts.base && !parts.index)
18912 gcc_assert(parts.disp);
18913 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18915 else
18917 if (!parts.base)
18919 if (regno0 != regno2)
18920 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18922 else if (!parts.index)
18924 if (regno0 != regno1)
18925 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18927 else
18929 if (regno0 == regno1)
18930 tmp = parts.index;
18931 else if (regno0 == regno2)
18932 tmp = parts.base;
18933 else
18935 rtx tmp1;
18937 /* Find better operand for SET instruction, depending
18938 on which definition is farther from the insn. */
18939 if (find_nearest_reg_def (insn, regno1, regno2))
18940 tmp = parts.index, tmp1 = parts.base;
18941 else
18942 tmp = parts.base, tmp1 = parts.index;
18944 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18946 if (parts.disp && parts.disp != const0_rtx)
18947 ix86_emit_binop (PLUS, mode, target, parts.disp);
18949 ix86_emit_binop (PLUS, mode, target, tmp1);
18950 return;
18953 ix86_emit_binop (PLUS, mode, target, tmp);
18956 if (parts.disp && parts.disp != const0_rtx)
18957 ix86_emit_binop (PLUS, mode, target, parts.disp);
18961 /* Return true if it is ok to optimize an ADD operation to LEA
18962 operation to avoid flag register consumation. For most processors,
18963 ADD is faster than LEA. For the processors like BONNELL, if the
18964 destination register of LEA holds an actual address which will be
18965 used soon, LEA is better and otherwise ADD is better. */
18967 bool
18968 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
18970 unsigned int regno0 = true_regnum (operands[0]);
18971 unsigned int regno1 = true_regnum (operands[1]);
18972 unsigned int regno2 = true_regnum (operands[2]);
18974 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18975 if (regno0 != regno1 && regno0 != regno2)
18976 return true;
18978 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18979 return false;
18981 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18984 /* Return true if destination reg of SET_BODY is shift count of
18985 USE_BODY. */
18987 static bool
18988 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18990 rtx set_dest;
18991 rtx shift_rtx;
18992 int i;
18994 /* Retrieve destination of SET_BODY. */
18995 switch (GET_CODE (set_body))
18997 case SET:
18998 set_dest = SET_DEST (set_body);
18999 if (!set_dest || !REG_P (set_dest))
19000 return false;
19001 break;
19002 case PARALLEL:
19003 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
19004 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
19005 use_body))
19006 return true;
19007 default:
19008 return false;
19009 break;
19012 /* Retrieve shift count of USE_BODY. */
19013 switch (GET_CODE (use_body))
19015 case SET:
19016 shift_rtx = XEXP (use_body, 1);
19017 break;
19018 case PARALLEL:
19019 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
19020 if (ix86_dep_by_shift_count_body (set_body,
19021 XVECEXP (use_body, 0, i)))
19022 return true;
19023 default:
19024 return false;
19025 break;
19028 if (shift_rtx
19029 && (GET_CODE (shift_rtx) == ASHIFT
19030 || GET_CODE (shift_rtx) == LSHIFTRT
19031 || GET_CODE (shift_rtx) == ASHIFTRT
19032 || GET_CODE (shift_rtx) == ROTATE
19033 || GET_CODE (shift_rtx) == ROTATERT))
19035 rtx shift_count = XEXP (shift_rtx, 1);
19037 /* Return true if shift count is dest of SET_BODY. */
19038 if (REG_P (shift_count))
19040 /* Add check since it can be invoked before register
19041 allocation in pre-reload schedule. */
19042 if (reload_completed
19043 && true_regnum (set_dest) == true_regnum (shift_count))
19044 return true;
19045 else if (REGNO(set_dest) == REGNO(shift_count))
19046 return true;
19050 return false;
19053 /* Return true if destination reg of SET_INSN is shift count of
19054 USE_INSN. */
19056 bool
19057 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
19059 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
19060 PATTERN (use_insn));
19063 /* Return TRUE or FALSE depending on whether the unary operator meets the
19064 appropriate constraints. */
19066 bool
19067 ix86_unary_operator_ok (enum rtx_code,
19068 machine_mode,
19069 rtx operands[2])
19071 /* If one of operands is memory, source and destination must match. */
19072 if ((MEM_P (operands[0])
19073 || MEM_P (operands[1]))
19074 && ! rtx_equal_p (operands[0], operands[1]))
19075 return false;
19076 return true;
19079 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
19080 are ok, keeping in mind the possible movddup alternative. */
19082 bool
19083 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
19085 if (MEM_P (operands[0]))
19086 return rtx_equal_p (operands[0], operands[1 + high]);
19087 if (MEM_P (operands[1]) && MEM_P (operands[2]))
19088 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
19089 return true;
19092 /* Post-reload splitter for converting an SF or DFmode value in an
19093 SSE register into an unsigned SImode. */
19095 void
19096 ix86_split_convert_uns_si_sse (rtx operands[])
19098 machine_mode vecmode;
19099 rtx value, large, zero_or_two31, input, two31, x;
19101 large = operands[1];
19102 zero_or_two31 = operands[2];
19103 input = operands[3];
19104 two31 = operands[4];
19105 vecmode = GET_MODE (large);
19106 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
19108 /* Load up the value into the low element. We must ensure that the other
19109 elements are valid floats -- zero is the easiest such value. */
19110 if (MEM_P (input))
19112 if (vecmode == V4SFmode)
19113 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
19114 else
19115 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
19117 else
19119 input = gen_rtx_REG (vecmode, REGNO (input));
19120 emit_move_insn (value, CONST0_RTX (vecmode));
19121 if (vecmode == V4SFmode)
19122 emit_insn (gen_sse_movss (value, value, input));
19123 else
19124 emit_insn (gen_sse2_movsd (value, value, input));
19127 emit_move_insn (large, two31);
19128 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19130 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19131 emit_insn (gen_rtx_SET (VOIDmode, large, x));
19133 x = gen_rtx_AND (vecmode, zero_or_two31, large);
19134 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
19136 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19137 emit_insn (gen_rtx_SET (VOIDmode, value, x));
19139 large = gen_rtx_REG (V4SImode, REGNO (large));
19140 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19142 x = gen_rtx_REG (V4SImode, REGNO (value));
19143 if (vecmode == V4SFmode)
19144 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19145 else
19146 emit_insn (gen_sse2_cvttpd2dq (x, value));
19147 value = x;
19149 emit_insn (gen_xorv4si3 (value, value, large));
19152 /* Convert an unsigned DImode value into a DFmode, using only SSE.
19153 Expects the 64-bit DImode to be supplied in a pair of integral
19154 registers. Requires SSE2; will use SSE3 if available. For x86_32,
19155 -mfpmath=sse, !optimize_size only. */
19157 void
19158 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19160 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19161 rtx int_xmm, fp_xmm;
19162 rtx biases, exponents;
19163 rtx x;
19165 int_xmm = gen_reg_rtx (V4SImode);
19166 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19167 emit_insn (gen_movdi_to_sse (int_xmm, input));
19168 else if (TARGET_SSE_SPLIT_REGS)
19170 emit_clobber (int_xmm);
19171 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19173 else
19175 x = gen_reg_rtx (V2DImode);
19176 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19177 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19180 x = gen_rtx_CONST_VECTOR (V4SImode,
19181 gen_rtvec (4, GEN_INT (0x43300000UL),
19182 GEN_INT (0x45300000UL),
19183 const0_rtx, const0_rtx));
19184 exponents = validize_mem (force_const_mem (V4SImode, x));
19186 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19187 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19189 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19190 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19191 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19192 (0x1.0p84 + double(fp_value_hi_xmm)).
19193 Note these exponents differ by 32. */
19195 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19197 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19198 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
19199 real_ldexp (&bias_lo_rvt, &dconst1, 52);
19200 real_ldexp (&bias_hi_rvt, &dconst1, 84);
19201 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19202 x = const_double_from_real_value (bias_hi_rvt, DFmode);
19203 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19204 biases = validize_mem (force_const_mem (V2DFmode, biases));
19205 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19207 /* Add the upper and lower DFmode values together. */
19208 if (TARGET_SSE3)
19209 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19210 else
19212 x = copy_to_mode_reg (V2DFmode, fp_xmm);
19213 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19214 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19217 ix86_expand_vector_extract (false, target, fp_xmm, 0);
19220 /* Not used, but eases macroization of patterns. */
19221 void
19222 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19224 gcc_unreachable ();
19227 /* Convert an unsigned SImode value into a DFmode. Only currently used
19228 for SSE, but applicable anywhere. */
19230 void
19231 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19233 REAL_VALUE_TYPE TWO31r;
19234 rtx x, fp;
19236 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19237 NULL, 1, OPTAB_DIRECT);
19239 fp = gen_reg_rtx (DFmode);
19240 emit_insn (gen_floatsidf2 (fp, x));
19242 real_ldexp (&TWO31r, &dconst1, 31);
19243 x = const_double_from_real_value (TWO31r, DFmode);
19245 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19246 if (x != target)
19247 emit_move_insn (target, x);
19250 /* Convert a signed DImode value into a DFmode. Only used for SSE in
19251 32-bit mode; otherwise we have a direct convert instruction. */
19253 void
19254 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19256 REAL_VALUE_TYPE TWO32r;
19257 rtx fp_lo, fp_hi, x;
19259 fp_lo = gen_reg_rtx (DFmode);
19260 fp_hi = gen_reg_rtx (DFmode);
19262 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19264 real_ldexp (&TWO32r, &dconst1, 32);
19265 x = const_double_from_real_value (TWO32r, DFmode);
19266 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19268 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19270 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19271 0, OPTAB_DIRECT);
19272 if (x != target)
19273 emit_move_insn (target, x);
19276 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19277 For x86_32, -mfpmath=sse, !optimize_size only. */
19278 void
19279 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19281 REAL_VALUE_TYPE ONE16r;
19282 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19284 real_ldexp (&ONE16r, &dconst1, 16);
19285 x = const_double_from_real_value (ONE16r, SFmode);
19286 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19287 NULL, 0, OPTAB_DIRECT);
19288 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19289 NULL, 0, OPTAB_DIRECT);
19290 fp_hi = gen_reg_rtx (SFmode);
19291 fp_lo = gen_reg_rtx (SFmode);
19292 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19293 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19294 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19295 0, OPTAB_DIRECT);
19296 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19297 0, OPTAB_DIRECT);
19298 if (!rtx_equal_p (target, fp_hi))
19299 emit_move_insn (target, fp_hi);
19302 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19303 a vector of unsigned ints VAL to vector of floats TARGET. */
19305 void
19306 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19308 rtx tmp[8];
19309 REAL_VALUE_TYPE TWO16r;
19310 machine_mode intmode = GET_MODE (val);
19311 machine_mode fltmode = GET_MODE (target);
19312 rtx (*cvt) (rtx, rtx);
19314 if (intmode == V4SImode)
19315 cvt = gen_floatv4siv4sf2;
19316 else
19317 cvt = gen_floatv8siv8sf2;
19318 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19319 tmp[0] = force_reg (intmode, tmp[0]);
19320 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19321 OPTAB_DIRECT);
19322 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19323 NULL_RTX, 1, OPTAB_DIRECT);
19324 tmp[3] = gen_reg_rtx (fltmode);
19325 emit_insn (cvt (tmp[3], tmp[1]));
19326 tmp[4] = gen_reg_rtx (fltmode);
19327 emit_insn (cvt (tmp[4], tmp[2]));
19328 real_ldexp (&TWO16r, &dconst1, 16);
19329 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19330 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19331 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19332 OPTAB_DIRECT);
19333 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19334 OPTAB_DIRECT);
19335 if (tmp[7] != target)
19336 emit_move_insn (target, tmp[7]);
19339 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19340 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19341 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19342 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19345 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19347 REAL_VALUE_TYPE TWO31r;
19348 rtx two31r, tmp[4];
19349 machine_mode mode = GET_MODE (val);
19350 machine_mode scalarmode = GET_MODE_INNER (mode);
19351 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19352 rtx (*cmp) (rtx, rtx, rtx, rtx);
19353 int i;
19355 for (i = 0; i < 3; i++)
19356 tmp[i] = gen_reg_rtx (mode);
19357 real_ldexp (&TWO31r, &dconst1, 31);
19358 two31r = const_double_from_real_value (TWO31r, scalarmode);
19359 two31r = ix86_build_const_vector (mode, 1, two31r);
19360 two31r = force_reg (mode, two31r);
19361 switch (mode)
19363 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19364 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19365 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19366 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19367 default: gcc_unreachable ();
19369 tmp[3] = gen_rtx_LE (mode, two31r, val);
19370 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19371 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19372 0, OPTAB_DIRECT);
19373 if (intmode == V4SImode || TARGET_AVX2)
19374 *xorp = expand_simple_binop (intmode, ASHIFT,
19375 gen_lowpart (intmode, tmp[0]),
19376 GEN_INT (31), NULL_RTX, 0,
19377 OPTAB_DIRECT);
19378 else
19380 rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31);
19381 two31 = ix86_build_const_vector (intmode, 1, two31);
19382 *xorp = expand_simple_binop (intmode, AND,
19383 gen_lowpart (intmode, tmp[0]),
19384 two31, NULL_RTX, 0,
19385 OPTAB_DIRECT);
19387 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19388 0, OPTAB_DIRECT);
19391 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19392 then replicate the value for all elements of the vector
19393 register. */
19396 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19398 int i, n_elt;
19399 rtvec v;
19400 machine_mode scalar_mode;
19402 switch (mode)
19404 case V64QImode:
19405 case V32QImode:
19406 case V16QImode:
19407 case V32HImode:
19408 case V16HImode:
19409 case V8HImode:
19410 case V16SImode:
19411 case V8SImode:
19412 case V4SImode:
19413 case V8DImode:
19414 case V4DImode:
19415 case V2DImode:
19416 gcc_assert (vect);
19417 case V16SFmode:
19418 case V8SFmode:
19419 case V4SFmode:
19420 case V8DFmode:
19421 case V4DFmode:
19422 case V2DFmode:
19423 n_elt = GET_MODE_NUNITS (mode);
19424 v = rtvec_alloc (n_elt);
19425 scalar_mode = GET_MODE_INNER (mode);
19427 RTVEC_ELT (v, 0) = value;
19429 for (i = 1; i < n_elt; ++i)
19430 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19432 return gen_rtx_CONST_VECTOR (mode, v);
19434 default:
19435 gcc_unreachable ();
19439 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19440 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19441 for an SSE register. If VECT is true, then replicate the mask for
19442 all elements of the vector register. If INVERT is true, then create
19443 a mask excluding the sign bit. */
19446 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19448 machine_mode vec_mode, imode;
19449 wide_int w;
19450 rtx mask, v;
19452 switch (mode)
19454 case V16SImode:
19455 case V16SFmode:
19456 case V8SImode:
19457 case V4SImode:
19458 case V8SFmode:
19459 case V4SFmode:
19460 vec_mode = mode;
19461 mode = GET_MODE_INNER (mode);
19462 imode = SImode;
19463 break;
19465 case V8DImode:
19466 case V4DImode:
19467 case V2DImode:
19468 case V8DFmode:
19469 case V4DFmode:
19470 case V2DFmode:
19471 vec_mode = mode;
19472 mode = GET_MODE_INNER (mode);
19473 imode = DImode;
19474 break;
19476 case TImode:
19477 case TFmode:
19478 vec_mode = VOIDmode;
19479 imode = TImode;
19480 break;
19482 default:
19483 gcc_unreachable ();
19486 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (mode) - 1,
19487 GET_MODE_BITSIZE (mode));
19488 if (invert)
19489 w = wi::bit_not (w);
19491 /* Force this value into the low part of a fp vector constant. */
19492 mask = immed_wide_int_const (w, imode);
19493 mask = gen_lowpart (mode, mask);
19495 if (vec_mode == VOIDmode)
19496 return force_reg (mode, mask);
19498 v = ix86_build_const_vector (vec_mode, vect, mask);
19499 return force_reg (vec_mode, v);
19502 /* Generate code for floating point ABS or NEG. */
19504 void
19505 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19506 rtx operands[])
19508 rtx mask, set, dst, src;
19509 bool use_sse = false;
19510 bool vector_mode = VECTOR_MODE_P (mode);
19511 machine_mode vmode = mode;
19513 if (vector_mode)
19514 use_sse = true;
19515 else if (mode == TFmode)
19516 use_sse = true;
19517 else if (TARGET_SSE_MATH)
19519 use_sse = SSE_FLOAT_MODE_P (mode);
19520 if (mode == SFmode)
19521 vmode = V4SFmode;
19522 else if (mode == DFmode)
19523 vmode = V2DFmode;
19526 /* NEG and ABS performed with SSE use bitwise mask operations.
19527 Create the appropriate mask now. */
19528 if (use_sse)
19529 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19530 else
19531 mask = NULL_RTX;
19533 dst = operands[0];
19534 src = operands[1];
19536 set = gen_rtx_fmt_e (code, mode, src);
19537 set = gen_rtx_SET (VOIDmode, dst, set);
19539 if (mask)
19541 rtx use, clob;
19542 rtvec par;
19544 use = gen_rtx_USE (VOIDmode, mask);
19545 if (vector_mode)
19546 par = gen_rtvec (2, set, use);
19547 else
19549 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19550 par = gen_rtvec (3, set, use, clob);
19552 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19554 else
19555 emit_insn (set);
19558 /* Expand a copysign operation. Special case operand 0 being a constant. */
19560 void
19561 ix86_expand_copysign (rtx operands[])
19563 machine_mode mode, vmode;
19564 rtx dest, op0, op1, mask, nmask;
19566 dest = operands[0];
19567 op0 = operands[1];
19568 op1 = operands[2];
19570 mode = GET_MODE (dest);
19572 if (mode == SFmode)
19573 vmode = V4SFmode;
19574 else if (mode == DFmode)
19575 vmode = V2DFmode;
19576 else
19577 vmode = mode;
19579 if (CONST_DOUBLE_P (op0))
19581 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19583 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19584 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19586 if (mode == SFmode || mode == DFmode)
19588 if (op0 == CONST0_RTX (mode))
19589 op0 = CONST0_RTX (vmode);
19590 else
19592 rtx v = ix86_build_const_vector (vmode, false, op0);
19594 op0 = force_reg (vmode, v);
19597 else if (op0 != CONST0_RTX (mode))
19598 op0 = force_reg (mode, op0);
19600 mask = ix86_build_signbit_mask (vmode, 0, 0);
19602 if (mode == SFmode)
19603 copysign_insn = gen_copysignsf3_const;
19604 else if (mode == DFmode)
19605 copysign_insn = gen_copysigndf3_const;
19606 else
19607 copysign_insn = gen_copysigntf3_const;
19609 emit_insn (copysign_insn (dest, op0, op1, mask));
19611 else
19613 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19615 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19616 mask = ix86_build_signbit_mask (vmode, 0, 0);
19618 if (mode == SFmode)
19619 copysign_insn = gen_copysignsf3_var;
19620 else if (mode == DFmode)
19621 copysign_insn = gen_copysigndf3_var;
19622 else
19623 copysign_insn = gen_copysigntf3_var;
19625 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19629 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19630 be a constant, and so has already been expanded into a vector constant. */
19632 void
19633 ix86_split_copysign_const (rtx operands[])
19635 machine_mode mode, vmode;
19636 rtx dest, op0, mask, x;
19638 dest = operands[0];
19639 op0 = operands[1];
19640 mask = operands[3];
19642 mode = GET_MODE (dest);
19643 vmode = GET_MODE (mask);
19645 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19646 x = gen_rtx_AND (vmode, dest, mask);
19647 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19649 if (op0 != CONST0_RTX (vmode))
19651 x = gen_rtx_IOR (vmode, dest, op0);
19652 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19656 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19657 so we have to do two masks. */
19659 void
19660 ix86_split_copysign_var (rtx operands[])
19662 machine_mode mode, vmode;
19663 rtx dest, scratch, op0, op1, mask, nmask, x;
19665 dest = operands[0];
19666 scratch = operands[1];
19667 op0 = operands[2];
19668 op1 = operands[3];
19669 nmask = operands[4];
19670 mask = operands[5];
19672 mode = GET_MODE (dest);
19673 vmode = GET_MODE (mask);
19675 if (rtx_equal_p (op0, op1))
19677 /* Shouldn't happen often (it's useless, obviously), but when it does
19678 we'd generate incorrect code if we continue below. */
19679 emit_move_insn (dest, op0);
19680 return;
19683 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19685 gcc_assert (REGNO (op1) == REGNO (scratch));
19687 x = gen_rtx_AND (vmode, scratch, mask);
19688 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19690 dest = mask;
19691 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19692 x = gen_rtx_NOT (vmode, dest);
19693 x = gen_rtx_AND (vmode, x, op0);
19694 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19696 else
19698 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19700 x = gen_rtx_AND (vmode, scratch, mask);
19702 else /* alternative 2,4 */
19704 gcc_assert (REGNO (mask) == REGNO (scratch));
19705 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19706 x = gen_rtx_AND (vmode, scratch, op1);
19708 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19710 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19712 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19713 x = gen_rtx_AND (vmode, dest, nmask);
19715 else /* alternative 3,4 */
19717 gcc_assert (REGNO (nmask) == REGNO (dest));
19718 dest = nmask;
19719 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19720 x = gen_rtx_AND (vmode, dest, op0);
19722 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19725 x = gen_rtx_IOR (vmode, dest, scratch);
19726 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19729 /* Return TRUE or FALSE depending on whether the first SET in INSN
19730 has source and destination with matching CC modes, and that the
19731 CC mode is at least as constrained as REQ_MODE. */
19733 bool
19734 ix86_match_ccmode (rtx insn, machine_mode req_mode)
19736 rtx set;
19737 machine_mode set_mode;
19739 set = PATTERN (insn);
19740 if (GET_CODE (set) == PARALLEL)
19741 set = XVECEXP (set, 0, 0);
19742 gcc_assert (GET_CODE (set) == SET);
19743 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19745 set_mode = GET_MODE (SET_DEST (set));
19746 switch (set_mode)
19748 case CCNOmode:
19749 if (req_mode != CCNOmode
19750 && (req_mode != CCmode
19751 || XEXP (SET_SRC (set), 1) != const0_rtx))
19752 return false;
19753 break;
19754 case CCmode:
19755 if (req_mode == CCGCmode)
19756 return false;
19757 /* FALLTHRU */
19758 case CCGCmode:
19759 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19760 return false;
19761 /* FALLTHRU */
19762 case CCGOCmode:
19763 if (req_mode == CCZmode)
19764 return false;
19765 /* FALLTHRU */
19766 case CCZmode:
19767 break;
19769 case CCAmode:
19770 case CCCmode:
19771 case CCOmode:
19772 case CCSmode:
19773 if (set_mode != req_mode)
19774 return false;
19775 break;
19777 default:
19778 gcc_unreachable ();
19781 return GET_MODE (SET_SRC (set)) == set_mode;
19784 /* Generate insn patterns to do an integer compare of OPERANDS. */
19786 static rtx
19787 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19789 machine_mode cmpmode;
19790 rtx tmp, flags;
19792 cmpmode = SELECT_CC_MODE (code, op0, op1);
19793 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19795 /* This is very simple, but making the interface the same as in the
19796 FP case makes the rest of the code easier. */
19797 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19798 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19800 /* Return the test that should be put into the flags user, i.e.
19801 the bcc, scc, or cmov instruction. */
19802 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19805 /* Figure out whether to use ordered or unordered fp comparisons.
19806 Return the appropriate mode to use. */
19808 machine_mode
19809 ix86_fp_compare_mode (enum rtx_code)
19811 /* ??? In order to make all comparisons reversible, we do all comparisons
19812 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19813 all forms trapping and nontrapping comparisons, we can make inequality
19814 comparisons trapping again, since it results in better code when using
19815 FCOM based compares. */
19816 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19819 machine_mode
19820 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19822 machine_mode mode = GET_MODE (op0);
19824 if (SCALAR_FLOAT_MODE_P (mode))
19826 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19827 return ix86_fp_compare_mode (code);
19830 switch (code)
19832 /* Only zero flag is needed. */
19833 case EQ: /* ZF=0 */
19834 case NE: /* ZF!=0 */
19835 return CCZmode;
19836 /* Codes needing carry flag. */
19837 case GEU: /* CF=0 */
19838 case LTU: /* CF=1 */
19839 /* Detect overflow checks. They need just the carry flag. */
19840 if (GET_CODE (op0) == PLUS
19841 && rtx_equal_p (op1, XEXP (op0, 0)))
19842 return CCCmode;
19843 else
19844 return CCmode;
19845 case GTU: /* CF=0 & ZF=0 */
19846 case LEU: /* CF=1 | ZF=1 */
19847 return CCmode;
19848 /* Codes possibly doable only with sign flag when
19849 comparing against zero. */
19850 case GE: /* SF=OF or SF=0 */
19851 case LT: /* SF<>OF or SF=1 */
19852 if (op1 == const0_rtx)
19853 return CCGOCmode;
19854 else
19855 /* For other cases Carry flag is not required. */
19856 return CCGCmode;
19857 /* Codes doable only with sign flag when comparing
19858 against zero, but we miss jump instruction for it
19859 so we need to use relational tests against overflow
19860 that thus needs to be zero. */
19861 case GT: /* ZF=0 & SF=OF */
19862 case LE: /* ZF=1 | SF<>OF */
19863 if (op1 == const0_rtx)
19864 return CCNOmode;
19865 else
19866 return CCGCmode;
19867 /* strcmp pattern do (use flags) and combine may ask us for proper
19868 mode. */
19869 case USE:
19870 return CCmode;
19871 default:
19872 gcc_unreachable ();
19876 /* Return the fixed registers used for condition codes. */
19878 static bool
19879 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19881 *p1 = FLAGS_REG;
19882 *p2 = FPSR_REG;
19883 return true;
19886 /* If two condition code modes are compatible, return a condition code
19887 mode which is compatible with both. Otherwise, return
19888 VOIDmode. */
19890 static machine_mode
19891 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
19893 if (m1 == m2)
19894 return m1;
19896 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19897 return VOIDmode;
19899 if ((m1 == CCGCmode && m2 == CCGOCmode)
19900 || (m1 == CCGOCmode && m2 == CCGCmode))
19901 return CCGCmode;
19903 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19904 return m2;
19905 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19906 return m1;
19908 switch (m1)
19910 default:
19911 gcc_unreachable ();
19913 case CCmode:
19914 case CCGCmode:
19915 case CCGOCmode:
19916 case CCNOmode:
19917 case CCAmode:
19918 case CCCmode:
19919 case CCOmode:
19920 case CCSmode:
19921 case CCZmode:
19922 switch (m2)
19924 default:
19925 return VOIDmode;
19927 case CCmode:
19928 case CCGCmode:
19929 case CCGOCmode:
19930 case CCNOmode:
19931 case CCAmode:
19932 case CCCmode:
19933 case CCOmode:
19934 case CCSmode:
19935 case CCZmode:
19936 return CCmode;
19939 case CCFPmode:
19940 case CCFPUmode:
19941 /* These are only compatible with themselves, which we already
19942 checked above. */
19943 return VOIDmode;
19948 /* Return a comparison we can do and that it is equivalent to
19949 swap_condition (code) apart possibly from orderedness.
19950 But, never change orderedness if TARGET_IEEE_FP, returning
19951 UNKNOWN in that case if necessary. */
19953 static enum rtx_code
19954 ix86_fp_swap_condition (enum rtx_code code)
19956 switch (code)
19958 case GT: /* GTU - CF=0 & ZF=0 */
19959 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19960 case GE: /* GEU - CF=0 */
19961 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19962 case UNLT: /* LTU - CF=1 */
19963 return TARGET_IEEE_FP ? UNKNOWN : GT;
19964 case UNLE: /* LEU - CF=1 | ZF=1 */
19965 return TARGET_IEEE_FP ? UNKNOWN : GE;
19966 default:
19967 return swap_condition (code);
19971 /* Return cost of comparison CODE using the best strategy for performance.
19972 All following functions do use number of instructions as a cost metrics.
19973 In future this should be tweaked to compute bytes for optimize_size and
19974 take into account performance of various instructions on various CPUs. */
19976 static int
19977 ix86_fp_comparison_cost (enum rtx_code code)
19979 int arith_cost;
19981 /* The cost of code using bit-twiddling on %ah. */
19982 switch (code)
19984 case UNLE:
19985 case UNLT:
19986 case LTGT:
19987 case GT:
19988 case GE:
19989 case UNORDERED:
19990 case ORDERED:
19991 case UNEQ:
19992 arith_cost = 4;
19993 break;
19994 case LT:
19995 case NE:
19996 case EQ:
19997 case UNGE:
19998 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19999 break;
20000 case LE:
20001 case UNGT:
20002 arith_cost = TARGET_IEEE_FP ? 6 : 4;
20003 break;
20004 default:
20005 gcc_unreachable ();
20008 switch (ix86_fp_comparison_strategy (code))
20010 case IX86_FPCMP_COMI:
20011 return arith_cost > 4 ? 3 : 2;
20012 case IX86_FPCMP_SAHF:
20013 return arith_cost > 4 ? 4 : 3;
20014 default:
20015 return arith_cost;
20019 /* Return strategy to use for floating-point. We assume that fcomi is always
20020 preferrable where available, since that is also true when looking at size
20021 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
20023 enum ix86_fpcmp_strategy
20024 ix86_fp_comparison_strategy (enum rtx_code)
20026 /* Do fcomi/sahf based test when profitable. */
20028 if (TARGET_CMOVE)
20029 return IX86_FPCMP_COMI;
20031 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
20032 return IX86_FPCMP_SAHF;
20034 return IX86_FPCMP_ARITH;
20037 /* Swap, force into registers, or otherwise massage the two operands
20038 to a fp comparison. The operands are updated in place; the new
20039 comparison code is returned. */
20041 static enum rtx_code
20042 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
20044 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
20045 rtx op0 = *pop0, op1 = *pop1;
20046 machine_mode op_mode = GET_MODE (op0);
20047 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
20049 /* All of the unordered compare instructions only work on registers.
20050 The same is true of the fcomi compare instructions. The XFmode
20051 compare instructions require registers except when comparing
20052 against zero or when converting operand 1 from fixed point to
20053 floating point. */
20055 if (!is_sse
20056 && (fpcmp_mode == CCFPUmode
20057 || (op_mode == XFmode
20058 && ! (standard_80387_constant_p (op0) == 1
20059 || standard_80387_constant_p (op1) == 1)
20060 && GET_CODE (op1) != FLOAT)
20061 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
20063 op0 = force_reg (op_mode, op0);
20064 op1 = force_reg (op_mode, op1);
20066 else
20068 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
20069 things around if they appear profitable, otherwise force op0
20070 into a register. */
20072 if (standard_80387_constant_p (op0) == 0
20073 || (MEM_P (op0)
20074 && ! (standard_80387_constant_p (op1) == 0
20075 || MEM_P (op1))))
20077 enum rtx_code new_code = ix86_fp_swap_condition (code);
20078 if (new_code != UNKNOWN)
20080 std::swap (op0, op1);
20081 code = new_code;
20085 if (!REG_P (op0))
20086 op0 = force_reg (op_mode, op0);
20088 if (CONSTANT_P (op1))
20090 int tmp = standard_80387_constant_p (op1);
20091 if (tmp == 0)
20092 op1 = validize_mem (force_const_mem (op_mode, op1));
20093 else if (tmp == 1)
20095 if (TARGET_CMOVE)
20096 op1 = force_reg (op_mode, op1);
20098 else
20099 op1 = force_reg (op_mode, op1);
20103 /* Try to rearrange the comparison to make it cheaper. */
20104 if (ix86_fp_comparison_cost (code)
20105 > ix86_fp_comparison_cost (swap_condition (code))
20106 && (REG_P (op1) || can_create_pseudo_p ()))
20108 std::swap (op0, op1);
20109 code = swap_condition (code);
20110 if (!REG_P (op0))
20111 op0 = force_reg (op_mode, op0);
20114 *pop0 = op0;
20115 *pop1 = op1;
20116 return code;
20119 /* Convert comparison codes we use to represent FP comparison to integer
20120 code that will result in proper branch. Return UNKNOWN if no such code
20121 is available. */
20123 enum rtx_code
20124 ix86_fp_compare_code_to_integer (enum rtx_code code)
20126 switch (code)
20128 case GT:
20129 return GTU;
20130 case GE:
20131 return GEU;
20132 case ORDERED:
20133 case UNORDERED:
20134 return code;
20135 break;
20136 case UNEQ:
20137 return EQ;
20138 break;
20139 case UNLT:
20140 return LTU;
20141 break;
20142 case UNLE:
20143 return LEU;
20144 break;
20145 case LTGT:
20146 return NE;
20147 break;
20148 default:
20149 return UNKNOWN;
20153 /* Generate insn patterns to do a floating point compare of OPERANDS. */
20155 static rtx
20156 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20158 machine_mode fpcmp_mode, intcmp_mode;
20159 rtx tmp, tmp2;
20161 fpcmp_mode = ix86_fp_compare_mode (code);
20162 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20164 /* Do fcomi/sahf based test when profitable. */
20165 switch (ix86_fp_comparison_strategy (code))
20167 case IX86_FPCMP_COMI:
20168 intcmp_mode = fpcmp_mode;
20169 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20170 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20171 tmp);
20172 emit_insn (tmp);
20173 break;
20175 case IX86_FPCMP_SAHF:
20176 intcmp_mode = fpcmp_mode;
20177 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20178 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20179 tmp);
20181 if (!scratch)
20182 scratch = gen_reg_rtx (HImode);
20183 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20184 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20185 break;
20187 case IX86_FPCMP_ARITH:
20188 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
20189 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20190 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20191 if (!scratch)
20192 scratch = gen_reg_rtx (HImode);
20193 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
20195 /* In the unordered case, we have to check C2 for NaN's, which
20196 doesn't happen to work out to anything nice combination-wise.
20197 So do some bit twiddling on the value we've got in AH to come
20198 up with an appropriate set of condition codes. */
20200 intcmp_mode = CCNOmode;
20201 switch (code)
20203 case GT:
20204 case UNGT:
20205 if (code == GT || !TARGET_IEEE_FP)
20207 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20208 code = EQ;
20210 else
20212 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20213 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20214 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20215 intcmp_mode = CCmode;
20216 code = GEU;
20218 break;
20219 case LT:
20220 case UNLT:
20221 if (code == LT && TARGET_IEEE_FP)
20223 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20224 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20225 intcmp_mode = CCmode;
20226 code = EQ;
20228 else
20230 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20231 code = NE;
20233 break;
20234 case GE:
20235 case UNGE:
20236 if (code == GE || !TARGET_IEEE_FP)
20238 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20239 code = EQ;
20241 else
20243 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20244 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20245 code = NE;
20247 break;
20248 case LE:
20249 case UNLE:
20250 if (code == LE && TARGET_IEEE_FP)
20252 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20253 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20254 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20255 intcmp_mode = CCmode;
20256 code = LTU;
20258 else
20260 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20261 code = NE;
20263 break;
20264 case EQ:
20265 case UNEQ:
20266 if (code == EQ && TARGET_IEEE_FP)
20268 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20269 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20270 intcmp_mode = CCmode;
20271 code = EQ;
20273 else
20275 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20276 code = NE;
20278 break;
20279 case NE:
20280 case LTGT:
20281 if (code == NE && TARGET_IEEE_FP)
20283 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20284 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20285 GEN_INT (0x40)));
20286 code = NE;
20288 else
20290 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20291 code = EQ;
20293 break;
20295 case UNORDERED:
20296 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20297 code = NE;
20298 break;
20299 case ORDERED:
20300 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20301 code = EQ;
20302 break;
20304 default:
20305 gcc_unreachable ();
20307 break;
20309 default:
20310 gcc_unreachable();
20313 /* Return the test that should be put into the flags user, i.e.
20314 the bcc, scc, or cmov instruction. */
20315 return gen_rtx_fmt_ee (code, VOIDmode,
20316 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20317 const0_rtx);
20320 static rtx
20321 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20323 rtx ret;
20325 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20326 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20328 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20330 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20331 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20333 else
20334 ret = ix86_expand_int_compare (code, op0, op1);
20336 return ret;
20339 void
20340 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20342 machine_mode mode = GET_MODE (op0);
20343 rtx tmp;
20345 switch (mode)
20347 case SFmode:
20348 case DFmode:
20349 case XFmode:
20350 case QImode:
20351 case HImode:
20352 case SImode:
20353 simple:
20354 tmp = ix86_expand_compare (code, op0, op1);
20355 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20356 gen_rtx_LABEL_REF (VOIDmode, label),
20357 pc_rtx);
20358 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20359 return;
20361 case DImode:
20362 if (TARGET_64BIT)
20363 goto simple;
20364 case TImode:
20365 /* Expand DImode branch into multiple compare+branch. */
20367 rtx lo[2], hi[2];
20368 rtx_code_label *label2;
20369 enum rtx_code code1, code2, code3;
20370 machine_mode submode;
20372 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20374 std::swap (op0, op1);
20375 code = swap_condition (code);
20378 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20379 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20381 submode = mode == DImode ? SImode : DImode;
20383 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20384 avoid two branches. This costs one extra insn, so disable when
20385 optimizing for size. */
20387 if ((code == EQ || code == NE)
20388 && (!optimize_insn_for_size_p ()
20389 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20391 rtx xor0, xor1;
20393 xor1 = hi[0];
20394 if (hi[1] != const0_rtx)
20395 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20396 NULL_RTX, 0, OPTAB_WIDEN);
20398 xor0 = lo[0];
20399 if (lo[1] != const0_rtx)
20400 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20401 NULL_RTX, 0, OPTAB_WIDEN);
20403 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20404 NULL_RTX, 0, OPTAB_WIDEN);
20406 ix86_expand_branch (code, tmp, const0_rtx, label);
20407 return;
20410 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20411 op1 is a constant and the low word is zero, then we can just
20412 examine the high word. Similarly for low word -1 and
20413 less-or-equal-than or greater-than. */
20415 if (CONST_INT_P (hi[1]))
20416 switch (code)
20418 case LT: case LTU: case GE: case GEU:
20419 if (lo[1] == const0_rtx)
20421 ix86_expand_branch (code, hi[0], hi[1], label);
20422 return;
20424 break;
20425 case LE: case LEU: case GT: case GTU:
20426 if (lo[1] == constm1_rtx)
20428 ix86_expand_branch (code, hi[0], hi[1], label);
20429 return;
20431 break;
20432 default:
20433 break;
20436 /* Otherwise, we need two or three jumps. */
20438 label2 = gen_label_rtx ();
20440 code1 = code;
20441 code2 = swap_condition (code);
20442 code3 = unsigned_condition (code);
20444 switch (code)
20446 case LT: case GT: case LTU: case GTU:
20447 break;
20449 case LE: code1 = LT; code2 = GT; break;
20450 case GE: code1 = GT; code2 = LT; break;
20451 case LEU: code1 = LTU; code2 = GTU; break;
20452 case GEU: code1 = GTU; code2 = LTU; break;
20454 case EQ: code1 = UNKNOWN; code2 = NE; break;
20455 case NE: code2 = UNKNOWN; break;
20457 default:
20458 gcc_unreachable ();
20462 * a < b =>
20463 * if (hi(a) < hi(b)) goto true;
20464 * if (hi(a) > hi(b)) goto false;
20465 * if (lo(a) < lo(b)) goto true;
20466 * false:
20469 if (code1 != UNKNOWN)
20470 ix86_expand_branch (code1, hi[0], hi[1], label);
20471 if (code2 != UNKNOWN)
20472 ix86_expand_branch (code2, hi[0], hi[1], label2);
20474 ix86_expand_branch (code3, lo[0], lo[1], label);
20476 if (code2 != UNKNOWN)
20477 emit_label (label2);
20478 return;
20481 default:
20482 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20483 goto simple;
20487 /* Split branch based on floating point condition. */
20488 void
20489 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20490 rtx target1, rtx target2, rtx tmp)
20492 rtx condition;
20493 rtx i;
20495 if (target2 != pc_rtx)
20497 std::swap (target1, target2);
20498 code = reverse_condition_maybe_unordered (code);
20501 condition = ix86_expand_fp_compare (code, op1, op2,
20502 tmp);
20504 i = emit_jump_insn (gen_rtx_SET
20505 (VOIDmode, pc_rtx,
20506 gen_rtx_IF_THEN_ELSE (VOIDmode,
20507 condition, target1, target2)));
20508 if (split_branch_probability >= 0)
20509 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20512 void
20513 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20515 rtx ret;
20517 gcc_assert (GET_MODE (dest) == QImode);
20519 ret = ix86_expand_compare (code, op0, op1);
20520 PUT_MODE (ret, QImode);
20521 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20524 /* Expand comparison setting or clearing carry flag. Return true when
20525 successful and set pop for the operation. */
20526 static bool
20527 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20529 machine_mode mode =
20530 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20532 /* Do not handle double-mode compares that go through special path. */
20533 if (mode == (TARGET_64BIT ? TImode : DImode))
20534 return false;
20536 if (SCALAR_FLOAT_MODE_P (mode))
20538 rtx compare_op;
20539 rtx_insn *compare_seq;
20541 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20543 /* Shortcut: following common codes never translate
20544 into carry flag compares. */
20545 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20546 || code == ORDERED || code == UNORDERED)
20547 return false;
20549 /* These comparisons require zero flag; swap operands so they won't. */
20550 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20551 && !TARGET_IEEE_FP)
20553 std::swap (op0, op1);
20554 code = swap_condition (code);
20557 /* Try to expand the comparison and verify that we end up with
20558 carry flag based comparison. This fails to be true only when
20559 we decide to expand comparison using arithmetic that is not
20560 too common scenario. */
20561 start_sequence ();
20562 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20563 compare_seq = get_insns ();
20564 end_sequence ();
20566 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20567 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20568 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20569 else
20570 code = GET_CODE (compare_op);
20572 if (code != LTU && code != GEU)
20573 return false;
20575 emit_insn (compare_seq);
20576 *pop = compare_op;
20577 return true;
20580 if (!INTEGRAL_MODE_P (mode))
20581 return false;
20583 switch (code)
20585 case LTU:
20586 case GEU:
20587 break;
20589 /* Convert a==0 into (unsigned)a<1. */
20590 case EQ:
20591 case NE:
20592 if (op1 != const0_rtx)
20593 return false;
20594 op1 = const1_rtx;
20595 code = (code == EQ ? LTU : GEU);
20596 break;
20598 /* Convert a>b into b<a or a>=b-1. */
20599 case GTU:
20600 case LEU:
20601 if (CONST_INT_P (op1))
20603 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20604 /* Bail out on overflow. We still can swap operands but that
20605 would force loading of the constant into register. */
20606 if (op1 == const0_rtx
20607 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20608 return false;
20609 code = (code == GTU ? GEU : LTU);
20611 else
20613 std::swap (op0, op1);
20614 code = (code == GTU ? LTU : GEU);
20616 break;
20618 /* Convert a>=0 into (unsigned)a<0x80000000. */
20619 case LT:
20620 case GE:
20621 if (mode == DImode || op1 != const0_rtx)
20622 return false;
20623 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20624 code = (code == LT ? GEU : LTU);
20625 break;
20626 case LE:
20627 case GT:
20628 if (mode == DImode || op1 != constm1_rtx)
20629 return false;
20630 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20631 code = (code == LE ? GEU : LTU);
20632 break;
20634 default:
20635 return false;
20637 /* Swapping operands may cause constant to appear as first operand. */
20638 if (!nonimmediate_operand (op0, VOIDmode))
20640 if (!can_create_pseudo_p ())
20641 return false;
20642 op0 = force_reg (mode, op0);
20644 *pop = ix86_expand_compare (code, op0, op1);
20645 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20646 return true;
20649 bool
20650 ix86_expand_int_movcc (rtx operands[])
20652 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20653 rtx_insn *compare_seq;
20654 rtx compare_op;
20655 machine_mode mode = GET_MODE (operands[0]);
20656 bool sign_bit_compare_p = false;
20657 rtx op0 = XEXP (operands[1], 0);
20658 rtx op1 = XEXP (operands[1], 1);
20660 if (GET_MODE (op0) == TImode
20661 || (GET_MODE (op0) == DImode
20662 && !TARGET_64BIT))
20663 return false;
20665 start_sequence ();
20666 compare_op = ix86_expand_compare (code, op0, op1);
20667 compare_seq = get_insns ();
20668 end_sequence ();
20670 compare_code = GET_CODE (compare_op);
20672 if ((op1 == const0_rtx && (code == GE || code == LT))
20673 || (op1 == constm1_rtx && (code == GT || code == LE)))
20674 sign_bit_compare_p = true;
20676 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20677 HImode insns, we'd be swallowed in word prefix ops. */
20679 if ((mode != HImode || TARGET_FAST_PREFIX)
20680 && (mode != (TARGET_64BIT ? TImode : DImode))
20681 && CONST_INT_P (operands[2])
20682 && CONST_INT_P (operands[3]))
20684 rtx out = operands[0];
20685 HOST_WIDE_INT ct = INTVAL (operands[2]);
20686 HOST_WIDE_INT cf = INTVAL (operands[3]);
20687 HOST_WIDE_INT diff;
20689 diff = ct - cf;
20690 /* Sign bit compares are better done using shifts than we do by using
20691 sbb. */
20692 if (sign_bit_compare_p
20693 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20695 /* Detect overlap between destination and compare sources. */
20696 rtx tmp = out;
20698 if (!sign_bit_compare_p)
20700 rtx flags;
20701 bool fpcmp = false;
20703 compare_code = GET_CODE (compare_op);
20705 flags = XEXP (compare_op, 0);
20707 if (GET_MODE (flags) == CCFPmode
20708 || GET_MODE (flags) == CCFPUmode)
20710 fpcmp = true;
20711 compare_code
20712 = ix86_fp_compare_code_to_integer (compare_code);
20715 /* To simplify rest of code, restrict to the GEU case. */
20716 if (compare_code == LTU)
20718 std::swap (ct, cf);
20719 compare_code = reverse_condition (compare_code);
20720 code = reverse_condition (code);
20722 else
20724 if (fpcmp)
20725 PUT_CODE (compare_op,
20726 reverse_condition_maybe_unordered
20727 (GET_CODE (compare_op)));
20728 else
20729 PUT_CODE (compare_op,
20730 reverse_condition (GET_CODE (compare_op)));
20732 diff = ct - cf;
20734 if (reg_overlap_mentioned_p (out, op0)
20735 || reg_overlap_mentioned_p (out, op1))
20736 tmp = gen_reg_rtx (mode);
20738 if (mode == DImode)
20739 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20740 else
20741 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20742 flags, compare_op));
20744 else
20746 if (code == GT || code == GE)
20747 code = reverse_condition (code);
20748 else
20750 std::swap (ct, cf);
20751 diff = ct - cf;
20753 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20756 if (diff == 1)
20759 * cmpl op0,op1
20760 * sbbl dest,dest
20761 * [addl dest, ct]
20763 * Size 5 - 8.
20765 if (ct)
20766 tmp = expand_simple_binop (mode, PLUS,
20767 tmp, GEN_INT (ct),
20768 copy_rtx (tmp), 1, OPTAB_DIRECT);
20770 else if (cf == -1)
20773 * cmpl op0,op1
20774 * sbbl dest,dest
20775 * orl $ct, dest
20777 * Size 8.
20779 tmp = expand_simple_binop (mode, IOR,
20780 tmp, GEN_INT (ct),
20781 copy_rtx (tmp), 1, OPTAB_DIRECT);
20783 else if (diff == -1 && ct)
20786 * cmpl op0,op1
20787 * sbbl dest,dest
20788 * notl dest
20789 * [addl dest, cf]
20791 * Size 8 - 11.
20793 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20794 if (cf)
20795 tmp = expand_simple_binop (mode, PLUS,
20796 copy_rtx (tmp), GEN_INT (cf),
20797 copy_rtx (tmp), 1, OPTAB_DIRECT);
20799 else
20802 * cmpl op0,op1
20803 * sbbl dest,dest
20804 * [notl dest]
20805 * andl cf - ct, dest
20806 * [addl dest, ct]
20808 * Size 8 - 11.
20811 if (cf == 0)
20813 cf = ct;
20814 ct = 0;
20815 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20818 tmp = expand_simple_binop (mode, AND,
20819 copy_rtx (tmp),
20820 gen_int_mode (cf - ct, mode),
20821 copy_rtx (tmp), 1, OPTAB_DIRECT);
20822 if (ct)
20823 tmp = expand_simple_binop (mode, PLUS,
20824 copy_rtx (tmp), GEN_INT (ct),
20825 copy_rtx (tmp), 1, OPTAB_DIRECT);
20828 if (!rtx_equal_p (tmp, out))
20829 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20831 return true;
20834 if (diff < 0)
20836 machine_mode cmp_mode = GET_MODE (op0);
20837 enum rtx_code new_code;
20839 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20841 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20843 /* We may be reversing unordered compare to normal compare, that
20844 is not valid in general (we may convert non-trapping condition
20845 to trapping one), however on i386 we currently emit all
20846 comparisons unordered. */
20847 new_code = reverse_condition_maybe_unordered (code);
20849 else
20850 new_code = ix86_reverse_condition (code, cmp_mode);
20851 if (new_code != UNKNOWN)
20853 std::swap (ct, cf);
20854 diff = -diff;
20855 code = new_code;
20859 compare_code = UNKNOWN;
20860 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20861 && CONST_INT_P (op1))
20863 if (op1 == const0_rtx
20864 && (code == LT || code == GE))
20865 compare_code = code;
20866 else if (op1 == constm1_rtx)
20868 if (code == LE)
20869 compare_code = LT;
20870 else if (code == GT)
20871 compare_code = GE;
20875 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20876 if (compare_code != UNKNOWN
20877 && GET_MODE (op0) == GET_MODE (out)
20878 && (cf == -1 || ct == -1))
20880 /* If lea code below could be used, only optimize
20881 if it results in a 2 insn sequence. */
20883 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20884 || diff == 3 || diff == 5 || diff == 9)
20885 || (compare_code == LT && ct == -1)
20886 || (compare_code == GE && cf == -1))
20889 * notl op1 (if necessary)
20890 * sarl $31, op1
20891 * orl cf, op1
20893 if (ct != -1)
20895 cf = ct;
20896 ct = -1;
20897 code = reverse_condition (code);
20900 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20902 out = expand_simple_binop (mode, IOR,
20903 out, GEN_INT (cf),
20904 out, 1, OPTAB_DIRECT);
20905 if (out != operands[0])
20906 emit_move_insn (operands[0], out);
20908 return true;
20913 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20914 || diff == 3 || diff == 5 || diff == 9)
20915 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20916 && (mode != DImode
20917 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20920 * xorl dest,dest
20921 * cmpl op1,op2
20922 * setcc dest
20923 * lea cf(dest*(ct-cf)),dest
20925 * Size 14.
20927 * This also catches the degenerate setcc-only case.
20930 rtx tmp;
20931 int nops;
20933 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20935 nops = 0;
20936 /* On x86_64 the lea instruction operates on Pmode, so we need
20937 to get arithmetics done in proper mode to match. */
20938 if (diff == 1)
20939 tmp = copy_rtx (out);
20940 else
20942 rtx out1;
20943 out1 = copy_rtx (out);
20944 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20945 nops++;
20946 if (diff & 1)
20948 tmp = gen_rtx_PLUS (mode, tmp, out1);
20949 nops++;
20952 if (cf != 0)
20954 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20955 nops++;
20957 if (!rtx_equal_p (tmp, out))
20959 if (nops == 1)
20960 out = force_operand (tmp, copy_rtx (out));
20961 else
20962 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20964 if (!rtx_equal_p (out, operands[0]))
20965 emit_move_insn (operands[0], copy_rtx (out));
20967 return true;
20971 * General case: Jumpful:
20972 * xorl dest,dest cmpl op1, op2
20973 * cmpl op1, op2 movl ct, dest
20974 * setcc dest jcc 1f
20975 * decl dest movl cf, dest
20976 * andl (cf-ct),dest 1:
20977 * addl ct,dest
20979 * Size 20. Size 14.
20981 * This is reasonably steep, but branch mispredict costs are
20982 * high on modern cpus, so consider failing only if optimizing
20983 * for space.
20986 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20987 && BRANCH_COST (optimize_insn_for_speed_p (),
20988 false) >= 2)
20990 if (cf == 0)
20992 machine_mode cmp_mode = GET_MODE (op0);
20993 enum rtx_code new_code;
20995 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20997 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20999 /* We may be reversing unordered compare to normal compare,
21000 that is not valid in general (we may convert non-trapping
21001 condition to trapping one), however on i386 we currently
21002 emit all comparisons unordered. */
21003 new_code = reverse_condition_maybe_unordered (code);
21005 else
21007 new_code = ix86_reverse_condition (code, cmp_mode);
21008 if (compare_code != UNKNOWN && new_code != UNKNOWN)
21009 compare_code = reverse_condition (compare_code);
21012 if (new_code != UNKNOWN)
21014 cf = ct;
21015 ct = 0;
21016 code = new_code;
21020 if (compare_code != UNKNOWN)
21022 /* notl op1 (if needed)
21023 sarl $31, op1
21024 andl (cf-ct), op1
21025 addl ct, op1
21027 For x < 0 (resp. x <= -1) there will be no notl,
21028 so if possible swap the constants to get rid of the
21029 complement.
21030 True/false will be -1/0 while code below (store flag
21031 followed by decrement) is 0/-1, so the constants need
21032 to be exchanged once more. */
21034 if (compare_code == GE || !cf)
21036 code = reverse_condition (code);
21037 compare_code = LT;
21039 else
21040 std::swap (ct, cf);
21042 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21044 else
21046 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21048 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
21049 constm1_rtx,
21050 copy_rtx (out), 1, OPTAB_DIRECT);
21053 out = expand_simple_binop (mode, AND, copy_rtx (out),
21054 gen_int_mode (cf - ct, mode),
21055 copy_rtx (out), 1, OPTAB_DIRECT);
21056 if (ct)
21057 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
21058 copy_rtx (out), 1, OPTAB_DIRECT);
21059 if (!rtx_equal_p (out, operands[0]))
21060 emit_move_insn (operands[0], copy_rtx (out));
21062 return true;
21066 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21068 /* Try a few things more with specific constants and a variable. */
21070 optab op;
21071 rtx var, orig_out, out, tmp;
21073 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
21074 return false;
21076 /* If one of the two operands is an interesting constant, load a
21077 constant with the above and mask it in with a logical operation. */
21079 if (CONST_INT_P (operands[2]))
21081 var = operands[3];
21082 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
21083 operands[3] = constm1_rtx, op = and_optab;
21084 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
21085 operands[3] = const0_rtx, op = ior_optab;
21086 else
21087 return false;
21089 else if (CONST_INT_P (operands[3]))
21091 var = operands[2];
21092 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21093 operands[2] = constm1_rtx, op = and_optab;
21094 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21095 operands[2] = const0_rtx, op = ior_optab;
21096 else
21097 return false;
21099 else
21100 return false;
21102 orig_out = operands[0];
21103 tmp = gen_reg_rtx (mode);
21104 operands[0] = tmp;
21106 /* Recurse to get the constant loaded. */
21107 if (ix86_expand_int_movcc (operands) == 0)
21108 return false;
21110 /* Mask in the interesting variable. */
21111 out = expand_binop (mode, op, var, tmp, orig_out, 0,
21112 OPTAB_WIDEN);
21113 if (!rtx_equal_p (out, orig_out))
21114 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21116 return true;
21120 * For comparison with above,
21122 * movl cf,dest
21123 * movl ct,tmp
21124 * cmpl op1,op2
21125 * cmovcc tmp,dest
21127 * Size 15.
21130 if (! nonimmediate_operand (operands[2], mode))
21131 operands[2] = force_reg (mode, operands[2]);
21132 if (! nonimmediate_operand (operands[3], mode))
21133 operands[3] = force_reg (mode, operands[3]);
21135 if (! register_operand (operands[2], VOIDmode)
21136 && (mode == QImode
21137 || ! register_operand (operands[3], VOIDmode)))
21138 operands[2] = force_reg (mode, operands[2]);
21140 if (mode == QImode
21141 && ! register_operand (operands[3], VOIDmode))
21142 operands[3] = force_reg (mode, operands[3]);
21144 emit_insn (compare_seq);
21145 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21146 gen_rtx_IF_THEN_ELSE (mode,
21147 compare_op, operands[2],
21148 operands[3])));
21149 return true;
21152 /* Swap, force into registers, or otherwise massage the two operands
21153 to an sse comparison with a mask result. Thus we differ a bit from
21154 ix86_prepare_fp_compare_args which expects to produce a flags result.
21156 The DEST operand exists to help determine whether to commute commutative
21157 operators. The POP0/POP1 operands are updated in place. The new
21158 comparison code is returned, or UNKNOWN if not implementable. */
21160 static enum rtx_code
21161 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21162 rtx *pop0, rtx *pop1)
21164 switch (code)
21166 case LTGT:
21167 case UNEQ:
21168 /* AVX supports all the needed comparisons. */
21169 if (TARGET_AVX)
21170 break;
21171 /* We have no LTGT as an operator. We could implement it with
21172 NE & ORDERED, but this requires an extra temporary. It's
21173 not clear that it's worth it. */
21174 return UNKNOWN;
21176 case LT:
21177 case LE:
21178 case UNGT:
21179 case UNGE:
21180 /* These are supported directly. */
21181 break;
21183 case EQ:
21184 case NE:
21185 case UNORDERED:
21186 case ORDERED:
21187 /* AVX has 3 operand comparisons, no need to swap anything. */
21188 if (TARGET_AVX)
21189 break;
21190 /* For commutative operators, try to canonicalize the destination
21191 operand to be first in the comparison - this helps reload to
21192 avoid extra moves. */
21193 if (!dest || !rtx_equal_p (dest, *pop1))
21194 break;
21195 /* FALLTHRU */
21197 case GE:
21198 case GT:
21199 case UNLE:
21200 case UNLT:
21201 /* These are not supported directly before AVX, and furthermore
21202 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
21203 comparison operands to transform into something that is
21204 supported. */
21205 std::swap (*pop0, *pop1);
21206 code = swap_condition (code);
21207 break;
21209 default:
21210 gcc_unreachable ();
21213 return code;
21216 /* Detect conditional moves that exactly match min/max operational
21217 semantics. Note that this is IEEE safe, as long as we don't
21218 interchange the operands.
21220 Returns FALSE if this conditional move doesn't match a MIN/MAX,
21221 and TRUE if the operation is successful and instructions are emitted. */
21223 static bool
21224 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21225 rtx cmp_op1, rtx if_true, rtx if_false)
21227 machine_mode mode;
21228 bool is_min;
21229 rtx tmp;
21231 if (code == LT)
21233 else if (code == UNGE)
21234 std::swap (if_true, if_false);
21235 else
21236 return false;
21238 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21239 is_min = true;
21240 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21241 is_min = false;
21242 else
21243 return false;
21245 mode = GET_MODE (dest);
21247 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21248 but MODE may be a vector mode and thus not appropriate. */
21249 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21251 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21252 rtvec v;
21254 if_true = force_reg (mode, if_true);
21255 v = gen_rtvec (2, if_true, if_false);
21256 tmp = gen_rtx_UNSPEC (mode, v, u);
21258 else
21260 code = is_min ? SMIN : SMAX;
21261 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21264 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
21265 return true;
21268 /* Expand an sse vector comparison. Return the register with the result. */
21270 static rtx
21271 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21272 rtx op_true, rtx op_false)
21274 machine_mode mode = GET_MODE (dest);
21275 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21277 /* In general case result of comparison can differ from operands' type. */
21278 machine_mode cmp_mode;
21280 /* In AVX512F the result of comparison is an integer mask. */
21281 bool maskcmp = false;
21282 rtx x;
21284 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21286 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21287 gcc_assert (cmp_mode != BLKmode);
21289 maskcmp = true;
21291 else
21292 cmp_mode = cmp_ops_mode;
21295 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21296 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21297 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21299 if (optimize
21300 || reg_overlap_mentioned_p (dest, op_true)
21301 || reg_overlap_mentioned_p (dest, op_false))
21302 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21304 /* Compare patterns for int modes are unspec in AVX512F only. */
21305 if (maskcmp && (code == GT || code == EQ))
21307 rtx (*gen)(rtx, rtx, rtx);
21309 switch (cmp_ops_mode)
21311 case V64QImode:
21312 gcc_assert (TARGET_AVX512BW);
21313 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
21314 break;
21315 case V32HImode:
21316 gcc_assert (TARGET_AVX512BW);
21317 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
21318 break;
21319 case V16SImode:
21320 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21321 break;
21322 case V8DImode:
21323 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21324 break;
21325 default:
21326 gen = NULL;
21329 if (gen)
21331 emit_insn (gen (dest, cmp_op0, cmp_op1));
21332 return dest;
21335 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21337 if (cmp_mode != mode && !maskcmp)
21339 x = force_reg (cmp_ops_mode, x);
21340 convert_move (dest, x, false);
21342 else
21343 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21345 return dest;
21348 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21349 operations. This is used for both scalar and vector conditional moves. */
21351 static void
21352 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21354 machine_mode mode = GET_MODE (dest);
21355 machine_mode cmpmode = GET_MODE (cmp);
21357 /* In AVX512F the result of comparison is an integer mask. */
21358 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21360 rtx t2, t3, x;
21362 if (vector_all_ones_operand (op_true, mode)
21363 && rtx_equal_p (op_false, CONST0_RTX (mode))
21364 && !maskcmp)
21366 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
21368 else if (op_false == CONST0_RTX (mode)
21369 && !maskcmp)
21371 op_true = force_reg (mode, op_true);
21372 x = gen_rtx_AND (mode, cmp, op_true);
21373 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21375 else if (op_true == CONST0_RTX (mode)
21376 && !maskcmp)
21378 op_false = force_reg (mode, op_false);
21379 x = gen_rtx_NOT (mode, cmp);
21380 x = gen_rtx_AND (mode, x, op_false);
21381 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21383 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21384 && !maskcmp)
21386 op_false = force_reg (mode, op_false);
21387 x = gen_rtx_IOR (mode, cmp, op_false);
21388 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21390 else if (TARGET_XOP
21391 && !maskcmp)
21393 op_true = force_reg (mode, op_true);
21395 if (!nonimmediate_operand (op_false, mode))
21396 op_false = force_reg (mode, op_false);
21398 emit_insn (gen_rtx_SET (mode, dest,
21399 gen_rtx_IF_THEN_ELSE (mode, cmp,
21400 op_true,
21401 op_false)));
21403 else
21405 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21406 rtx d = dest;
21408 if (!nonimmediate_operand (op_true, mode))
21409 op_true = force_reg (mode, op_true);
21411 op_false = force_reg (mode, op_false);
21413 switch (mode)
21415 case V4SFmode:
21416 if (TARGET_SSE4_1)
21417 gen = gen_sse4_1_blendvps;
21418 break;
21419 case V2DFmode:
21420 if (TARGET_SSE4_1)
21421 gen = gen_sse4_1_blendvpd;
21422 break;
21423 case V16QImode:
21424 case V8HImode:
21425 case V4SImode:
21426 case V2DImode:
21427 if (TARGET_SSE4_1)
21429 gen = gen_sse4_1_pblendvb;
21430 if (mode != V16QImode)
21431 d = gen_reg_rtx (V16QImode);
21432 op_false = gen_lowpart (V16QImode, op_false);
21433 op_true = gen_lowpart (V16QImode, op_true);
21434 cmp = gen_lowpart (V16QImode, cmp);
21436 break;
21437 case V8SFmode:
21438 if (TARGET_AVX)
21439 gen = gen_avx_blendvps256;
21440 break;
21441 case V4DFmode:
21442 if (TARGET_AVX)
21443 gen = gen_avx_blendvpd256;
21444 break;
21445 case V32QImode:
21446 case V16HImode:
21447 case V8SImode:
21448 case V4DImode:
21449 if (TARGET_AVX2)
21451 gen = gen_avx2_pblendvb;
21452 if (mode != V32QImode)
21453 d = gen_reg_rtx (V32QImode);
21454 op_false = gen_lowpart (V32QImode, op_false);
21455 op_true = gen_lowpart (V32QImode, op_true);
21456 cmp = gen_lowpart (V32QImode, cmp);
21458 break;
21460 case V64QImode:
21461 gen = gen_avx512bw_blendmv64qi;
21462 break;
21463 case V32HImode:
21464 gen = gen_avx512bw_blendmv32hi;
21465 break;
21466 case V16SImode:
21467 gen = gen_avx512f_blendmv16si;
21468 break;
21469 case V8DImode:
21470 gen = gen_avx512f_blendmv8di;
21471 break;
21472 case V8DFmode:
21473 gen = gen_avx512f_blendmv8df;
21474 break;
21475 case V16SFmode:
21476 gen = gen_avx512f_blendmv16sf;
21477 break;
21479 default:
21480 break;
21483 if (gen != NULL)
21485 emit_insn (gen (d, op_false, op_true, cmp));
21486 if (d != dest)
21487 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21489 else
21491 op_true = force_reg (mode, op_true);
21493 t2 = gen_reg_rtx (mode);
21494 if (optimize)
21495 t3 = gen_reg_rtx (mode);
21496 else
21497 t3 = dest;
21499 x = gen_rtx_AND (mode, op_true, cmp);
21500 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21502 x = gen_rtx_NOT (mode, cmp);
21503 x = gen_rtx_AND (mode, x, op_false);
21504 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21506 x = gen_rtx_IOR (mode, t3, t2);
21507 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21512 /* Expand a floating-point conditional move. Return true if successful. */
21514 bool
21515 ix86_expand_fp_movcc (rtx operands[])
21517 machine_mode mode = GET_MODE (operands[0]);
21518 enum rtx_code code = GET_CODE (operands[1]);
21519 rtx tmp, compare_op;
21520 rtx op0 = XEXP (operands[1], 0);
21521 rtx op1 = XEXP (operands[1], 1);
21523 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21525 machine_mode cmode;
21527 /* Since we've no cmove for sse registers, don't force bad register
21528 allocation just to gain access to it. Deny movcc when the
21529 comparison mode doesn't match the move mode. */
21530 cmode = GET_MODE (op0);
21531 if (cmode == VOIDmode)
21532 cmode = GET_MODE (op1);
21533 if (cmode != mode)
21534 return false;
21536 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21537 if (code == UNKNOWN)
21538 return false;
21540 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21541 operands[2], operands[3]))
21542 return true;
21544 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21545 operands[2], operands[3]);
21546 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21547 return true;
21550 if (GET_MODE (op0) == TImode
21551 || (GET_MODE (op0) == DImode
21552 && !TARGET_64BIT))
21553 return false;
21555 /* The floating point conditional move instructions don't directly
21556 support conditions resulting from a signed integer comparison. */
21558 compare_op = ix86_expand_compare (code, op0, op1);
21559 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21561 tmp = gen_reg_rtx (QImode);
21562 ix86_expand_setcc (tmp, code, op0, op1);
21564 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21567 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21568 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21569 operands[2], operands[3])));
21571 return true;
21574 /* Expand a floating-point vector conditional move; a vcond operation
21575 rather than a movcc operation. */
21577 bool
21578 ix86_expand_fp_vcond (rtx operands[])
21580 enum rtx_code code = GET_CODE (operands[3]);
21581 rtx cmp;
21583 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21584 &operands[4], &operands[5]);
21585 if (code == UNKNOWN)
21587 rtx temp;
21588 switch (GET_CODE (operands[3]))
21590 case LTGT:
21591 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21592 operands[5], operands[0], operands[0]);
21593 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21594 operands[5], operands[1], operands[2]);
21595 code = AND;
21596 break;
21597 case UNEQ:
21598 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21599 operands[5], operands[0], operands[0]);
21600 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21601 operands[5], operands[1], operands[2]);
21602 code = IOR;
21603 break;
21604 default:
21605 gcc_unreachable ();
21607 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21608 OPTAB_DIRECT);
21609 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21610 return true;
21613 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21614 operands[5], operands[1], operands[2]))
21615 return true;
21617 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21618 operands[1], operands[2]);
21619 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21620 return true;
21623 /* Expand a signed/unsigned integral vector conditional move. */
21625 bool
21626 ix86_expand_int_vcond (rtx operands[])
21628 machine_mode data_mode = GET_MODE (operands[0]);
21629 machine_mode mode = GET_MODE (operands[4]);
21630 enum rtx_code code = GET_CODE (operands[3]);
21631 bool negate = false;
21632 rtx x, cop0, cop1;
21634 cop0 = operands[4];
21635 cop1 = operands[5];
21637 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21638 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21639 if ((code == LT || code == GE)
21640 && data_mode == mode
21641 && cop1 == CONST0_RTX (mode)
21642 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21643 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21644 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21645 && (GET_MODE_SIZE (data_mode) == 16
21646 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21648 rtx negop = operands[2 - (code == LT)];
21649 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21650 if (negop == CONST1_RTX (data_mode))
21652 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21653 operands[0], 1, OPTAB_DIRECT);
21654 if (res != operands[0])
21655 emit_move_insn (operands[0], res);
21656 return true;
21658 else if (GET_MODE_INNER (data_mode) != DImode
21659 && vector_all_ones_operand (negop, data_mode))
21661 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21662 operands[0], 0, OPTAB_DIRECT);
21663 if (res != operands[0])
21664 emit_move_insn (operands[0], res);
21665 return true;
21669 if (!nonimmediate_operand (cop1, mode))
21670 cop1 = force_reg (mode, cop1);
21671 if (!general_operand (operands[1], data_mode))
21672 operands[1] = force_reg (data_mode, operands[1]);
21673 if (!general_operand (operands[2], data_mode))
21674 operands[2] = force_reg (data_mode, operands[2]);
21676 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21677 if (TARGET_XOP
21678 && (mode == V16QImode || mode == V8HImode
21679 || mode == V4SImode || mode == V2DImode))
21681 else
21683 /* Canonicalize the comparison to EQ, GT, GTU. */
21684 switch (code)
21686 case EQ:
21687 case GT:
21688 case GTU:
21689 break;
21691 case NE:
21692 case LE:
21693 case LEU:
21694 code = reverse_condition (code);
21695 negate = true;
21696 break;
21698 case GE:
21699 case GEU:
21700 code = reverse_condition (code);
21701 negate = true;
21702 /* FALLTHRU */
21704 case LT:
21705 case LTU:
21706 std::swap (cop0, cop1);
21707 code = swap_condition (code);
21708 break;
21710 default:
21711 gcc_unreachable ();
21714 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21715 if (mode == V2DImode)
21717 switch (code)
21719 case EQ:
21720 /* SSE4.1 supports EQ. */
21721 if (!TARGET_SSE4_1)
21722 return false;
21723 break;
21725 case GT:
21726 case GTU:
21727 /* SSE4.2 supports GT/GTU. */
21728 if (!TARGET_SSE4_2)
21729 return false;
21730 break;
21732 default:
21733 gcc_unreachable ();
21737 /* Unsigned parallel compare is not supported by the hardware.
21738 Play some tricks to turn this into a signed comparison
21739 against 0. */
21740 if (code == GTU)
21742 cop0 = force_reg (mode, cop0);
21744 switch (mode)
21746 case V16SImode:
21747 case V8DImode:
21748 case V8SImode:
21749 case V4DImode:
21750 case V4SImode:
21751 case V2DImode:
21753 rtx t1, t2, mask;
21754 rtx (*gen_sub3) (rtx, rtx, rtx);
21756 switch (mode)
21758 case V16SImode: gen_sub3 = gen_subv16si3; break;
21759 case V8DImode: gen_sub3 = gen_subv8di3; break;
21760 case V8SImode: gen_sub3 = gen_subv8si3; break;
21761 case V4DImode: gen_sub3 = gen_subv4di3; break;
21762 case V4SImode: gen_sub3 = gen_subv4si3; break;
21763 case V2DImode: gen_sub3 = gen_subv2di3; break;
21764 default:
21765 gcc_unreachable ();
21767 /* Subtract (-(INT MAX) - 1) from both operands to make
21768 them signed. */
21769 mask = ix86_build_signbit_mask (mode, true, false);
21770 t1 = gen_reg_rtx (mode);
21771 emit_insn (gen_sub3 (t1, cop0, mask));
21773 t2 = gen_reg_rtx (mode);
21774 emit_insn (gen_sub3 (t2, cop1, mask));
21776 cop0 = t1;
21777 cop1 = t2;
21778 code = GT;
21780 break;
21782 case V64QImode:
21783 case V32HImode:
21784 case V32QImode:
21785 case V16HImode:
21786 case V16QImode:
21787 case V8HImode:
21788 /* Perform a parallel unsigned saturating subtraction. */
21789 x = gen_reg_rtx (mode);
21790 emit_insn (gen_rtx_SET (VOIDmode, x,
21791 gen_rtx_US_MINUS (mode, cop0, cop1)));
21793 cop0 = x;
21794 cop1 = CONST0_RTX (mode);
21795 code = EQ;
21796 negate = !negate;
21797 break;
21799 default:
21800 gcc_unreachable ();
21805 /* Allow the comparison to be done in one mode, but the movcc to
21806 happen in another mode. */
21807 if (data_mode == mode)
21809 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21810 operands[1+negate], operands[2-negate]);
21812 else
21814 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21815 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21816 operands[1+negate], operands[2-negate]);
21817 if (GET_MODE (x) == mode)
21818 x = gen_lowpart (data_mode, x);
21821 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21822 operands[2-negate]);
21823 return true;
21826 /* AVX512F does support 64-byte integer vector operations,
21827 thus the longest vector we are faced with is V64QImode. */
21828 #define MAX_VECT_LEN 64
21830 struct expand_vec_perm_d
21832 rtx target, op0, op1;
21833 unsigned char perm[MAX_VECT_LEN];
21834 machine_mode vmode;
21835 unsigned char nelt;
21836 bool one_operand_p;
21837 bool testing_p;
21840 static bool
21841 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21842 struct expand_vec_perm_d *d)
21844 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21845 expander, so args are either in d, or in op0, op1 etc. */
21846 machine_mode mode = GET_MODE (d ? d->op0 : op0);
21847 machine_mode maskmode = mode;
21848 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21850 switch (mode)
21852 case V8HImode:
21853 if (TARGET_AVX512VL && TARGET_AVX512BW)
21854 gen = gen_avx512vl_vpermi2varv8hi3;
21855 break;
21856 case V16HImode:
21857 if (TARGET_AVX512VL && TARGET_AVX512BW)
21858 gen = gen_avx512vl_vpermi2varv16hi3;
21859 break;
21860 case V64QImode:
21861 if (TARGET_AVX512VBMI)
21862 gen = gen_avx512bw_vpermi2varv64qi3;
21863 break;
21864 case V32HImode:
21865 if (TARGET_AVX512BW)
21866 gen = gen_avx512bw_vpermi2varv32hi3;
21867 break;
21868 case V4SImode:
21869 if (TARGET_AVX512VL)
21870 gen = gen_avx512vl_vpermi2varv4si3;
21871 break;
21872 case V8SImode:
21873 if (TARGET_AVX512VL)
21874 gen = gen_avx512vl_vpermi2varv8si3;
21875 break;
21876 case V16SImode:
21877 if (TARGET_AVX512F)
21878 gen = gen_avx512f_vpermi2varv16si3;
21879 break;
21880 case V4SFmode:
21881 if (TARGET_AVX512VL)
21883 gen = gen_avx512vl_vpermi2varv4sf3;
21884 maskmode = V4SImode;
21886 break;
21887 case V8SFmode:
21888 if (TARGET_AVX512VL)
21890 gen = gen_avx512vl_vpermi2varv8sf3;
21891 maskmode = V8SImode;
21893 break;
21894 case V16SFmode:
21895 if (TARGET_AVX512F)
21897 gen = gen_avx512f_vpermi2varv16sf3;
21898 maskmode = V16SImode;
21900 break;
21901 case V2DImode:
21902 if (TARGET_AVX512VL)
21903 gen = gen_avx512vl_vpermi2varv2di3;
21904 break;
21905 case V4DImode:
21906 if (TARGET_AVX512VL)
21907 gen = gen_avx512vl_vpermi2varv4di3;
21908 break;
21909 case V8DImode:
21910 if (TARGET_AVX512F)
21911 gen = gen_avx512f_vpermi2varv8di3;
21912 break;
21913 case V2DFmode:
21914 if (TARGET_AVX512VL)
21916 gen = gen_avx512vl_vpermi2varv2df3;
21917 maskmode = V2DImode;
21919 break;
21920 case V4DFmode:
21921 if (TARGET_AVX512VL)
21923 gen = gen_avx512vl_vpermi2varv4df3;
21924 maskmode = V4DImode;
21926 break;
21927 case V8DFmode:
21928 if (TARGET_AVX512F)
21930 gen = gen_avx512f_vpermi2varv8df3;
21931 maskmode = V8DImode;
21933 break;
21934 default:
21935 break;
21938 if (gen == NULL)
21939 return false;
21941 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21942 expander, so args are either in d, or in op0, op1 etc. */
21943 if (d)
21945 rtx vec[64];
21946 target = d->target;
21947 op0 = d->op0;
21948 op1 = d->op1;
21949 for (int i = 0; i < d->nelt; ++i)
21950 vec[i] = GEN_INT (d->perm[i]);
21951 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
21954 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
21955 return true;
21958 /* Expand a variable vector permutation. */
21960 void
21961 ix86_expand_vec_perm (rtx operands[])
21963 rtx target = operands[0];
21964 rtx op0 = operands[1];
21965 rtx op1 = operands[2];
21966 rtx mask = operands[3];
21967 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21968 machine_mode mode = GET_MODE (op0);
21969 machine_mode maskmode = GET_MODE (mask);
21970 int w, e, i;
21971 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21973 /* Number of elements in the vector. */
21974 w = GET_MODE_NUNITS (mode);
21975 e = GET_MODE_UNIT_SIZE (mode);
21976 gcc_assert (w <= 64);
21978 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
21979 return;
21981 if (TARGET_AVX2)
21983 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21985 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21986 an constant shuffle operand. With a tiny bit of effort we can
21987 use VPERMD instead. A re-interpretation stall for V4DFmode is
21988 unfortunate but there's no avoiding it.
21989 Similarly for V16HImode we don't have instructions for variable
21990 shuffling, while for V32QImode we can use after preparing suitable
21991 masks vpshufb; vpshufb; vpermq; vpor. */
21993 if (mode == V16HImode)
21995 maskmode = mode = V32QImode;
21996 w = 32;
21997 e = 1;
21999 else
22001 maskmode = mode = V8SImode;
22002 w = 8;
22003 e = 4;
22005 t1 = gen_reg_rtx (maskmode);
22007 /* Replicate the low bits of the V4DImode mask into V8SImode:
22008 mask = { A B C D }
22009 t1 = { A A B B C C D D }. */
22010 for (i = 0; i < w / 2; ++i)
22011 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
22012 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22013 vt = force_reg (maskmode, vt);
22014 mask = gen_lowpart (maskmode, mask);
22015 if (maskmode == V8SImode)
22016 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
22017 else
22018 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
22020 /* Multiply the shuffle indicies by two. */
22021 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
22022 OPTAB_DIRECT);
22024 /* Add one to the odd shuffle indicies:
22025 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
22026 for (i = 0; i < w / 2; ++i)
22028 vec[i * 2] = const0_rtx;
22029 vec[i * 2 + 1] = const1_rtx;
22031 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22032 vt = validize_mem (force_const_mem (maskmode, vt));
22033 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
22034 OPTAB_DIRECT);
22036 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
22037 operands[3] = mask = t1;
22038 target = gen_reg_rtx (mode);
22039 op0 = gen_lowpart (mode, op0);
22040 op1 = gen_lowpart (mode, op1);
22043 switch (mode)
22045 case V8SImode:
22046 /* The VPERMD and VPERMPS instructions already properly ignore
22047 the high bits of the shuffle elements. No need for us to
22048 perform an AND ourselves. */
22049 if (one_operand_shuffle)
22051 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
22052 if (target != operands[0])
22053 emit_move_insn (operands[0],
22054 gen_lowpart (GET_MODE (operands[0]), target));
22056 else
22058 t1 = gen_reg_rtx (V8SImode);
22059 t2 = gen_reg_rtx (V8SImode);
22060 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
22061 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
22062 goto merge_two;
22064 return;
22066 case V8SFmode:
22067 mask = gen_lowpart (V8SImode, mask);
22068 if (one_operand_shuffle)
22069 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
22070 else
22072 t1 = gen_reg_rtx (V8SFmode);
22073 t2 = gen_reg_rtx (V8SFmode);
22074 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
22075 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
22076 goto merge_two;
22078 return;
22080 case V4SImode:
22081 /* By combining the two 128-bit input vectors into one 256-bit
22082 input vector, we can use VPERMD and VPERMPS for the full
22083 two-operand shuffle. */
22084 t1 = gen_reg_rtx (V8SImode);
22085 t2 = gen_reg_rtx (V8SImode);
22086 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
22087 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22088 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
22089 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
22090 return;
22092 case V4SFmode:
22093 t1 = gen_reg_rtx (V8SFmode);
22094 t2 = gen_reg_rtx (V8SImode);
22095 mask = gen_lowpart (V4SImode, mask);
22096 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
22097 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22098 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
22099 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
22100 return;
22102 case V32QImode:
22103 t1 = gen_reg_rtx (V32QImode);
22104 t2 = gen_reg_rtx (V32QImode);
22105 t3 = gen_reg_rtx (V32QImode);
22106 vt2 = GEN_INT (-128);
22107 for (i = 0; i < 32; i++)
22108 vec[i] = vt2;
22109 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22110 vt = force_reg (V32QImode, vt);
22111 for (i = 0; i < 32; i++)
22112 vec[i] = i < 16 ? vt2 : const0_rtx;
22113 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22114 vt2 = force_reg (V32QImode, vt2);
22115 /* From mask create two adjusted masks, which contain the same
22116 bits as mask in the low 7 bits of each vector element.
22117 The first mask will have the most significant bit clear
22118 if it requests element from the same 128-bit lane
22119 and MSB set if it requests element from the other 128-bit lane.
22120 The second mask will have the opposite values of the MSB,
22121 and additionally will have its 128-bit lanes swapped.
22122 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22123 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
22124 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22125 stands for other 12 bytes. */
22126 /* The bit whether element is from the same lane or the other
22127 lane is bit 4, so shift it up by 3 to the MSB position. */
22128 t5 = gen_reg_rtx (V4DImode);
22129 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22130 GEN_INT (3)));
22131 /* Clear MSB bits from the mask just in case it had them set. */
22132 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22133 /* After this t1 will have MSB set for elements from other lane. */
22134 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22135 /* Clear bits other than MSB. */
22136 emit_insn (gen_andv32qi3 (t1, t1, vt));
22137 /* Or in the lower bits from mask into t3. */
22138 emit_insn (gen_iorv32qi3 (t3, t1, t2));
22139 /* And invert MSB bits in t1, so MSB is set for elements from the same
22140 lane. */
22141 emit_insn (gen_xorv32qi3 (t1, t1, vt));
22142 /* Swap 128-bit lanes in t3. */
22143 t6 = gen_reg_rtx (V4DImode);
22144 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22145 const2_rtx, GEN_INT (3),
22146 const0_rtx, const1_rtx));
22147 /* And or in the lower bits from mask into t1. */
22148 emit_insn (gen_iorv32qi3 (t1, t1, t2));
22149 if (one_operand_shuffle)
22151 /* Each of these shuffles will put 0s in places where
22152 element from the other 128-bit lane is needed, otherwise
22153 will shuffle in the requested value. */
22154 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22155 gen_lowpart (V32QImode, t6)));
22156 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22157 /* For t3 the 128-bit lanes are swapped again. */
22158 t7 = gen_reg_rtx (V4DImode);
22159 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22160 const2_rtx, GEN_INT (3),
22161 const0_rtx, const1_rtx));
22162 /* And oring both together leads to the result. */
22163 emit_insn (gen_iorv32qi3 (target, t1,
22164 gen_lowpart (V32QImode, t7)));
22165 if (target != operands[0])
22166 emit_move_insn (operands[0],
22167 gen_lowpart (GET_MODE (operands[0]), target));
22168 return;
22171 t4 = gen_reg_rtx (V32QImode);
22172 /* Similarly to the above one_operand_shuffle code,
22173 just for repeated twice for each operand. merge_two:
22174 code will merge the two results together. */
22175 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22176 gen_lowpart (V32QImode, t6)));
22177 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22178 gen_lowpart (V32QImode, t6)));
22179 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22180 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22181 t7 = gen_reg_rtx (V4DImode);
22182 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22183 const2_rtx, GEN_INT (3),
22184 const0_rtx, const1_rtx));
22185 t8 = gen_reg_rtx (V4DImode);
22186 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22187 const2_rtx, GEN_INT (3),
22188 const0_rtx, const1_rtx));
22189 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22190 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22191 t1 = t4;
22192 t2 = t3;
22193 goto merge_two;
22195 default:
22196 gcc_assert (GET_MODE_SIZE (mode) <= 16);
22197 break;
22201 if (TARGET_XOP)
22203 /* The XOP VPPERM insn supports three inputs. By ignoring the
22204 one_operand_shuffle special case, we avoid creating another
22205 set of constant vectors in memory. */
22206 one_operand_shuffle = false;
22208 /* mask = mask & {2*w-1, ...} */
22209 vt = GEN_INT (2*w - 1);
22211 else
22213 /* mask = mask & {w-1, ...} */
22214 vt = GEN_INT (w - 1);
22217 for (i = 0; i < w; i++)
22218 vec[i] = vt;
22219 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22220 mask = expand_simple_binop (maskmode, AND, mask, vt,
22221 NULL_RTX, 0, OPTAB_DIRECT);
22223 /* For non-QImode operations, convert the word permutation control
22224 into a byte permutation control. */
22225 if (mode != V16QImode)
22227 mask = expand_simple_binop (maskmode, ASHIFT, mask,
22228 GEN_INT (exact_log2 (e)),
22229 NULL_RTX, 0, OPTAB_DIRECT);
22231 /* Convert mask to vector of chars. */
22232 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22234 /* Replicate each of the input bytes into byte positions:
22235 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22236 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22237 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
22238 for (i = 0; i < 16; ++i)
22239 vec[i] = GEN_INT (i/e * e);
22240 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22241 vt = validize_mem (force_const_mem (V16QImode, vt));
22242 if (TARGET_XOP)
22243 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22244 else
22245 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22247 /* Convert it into the byte positions by doing
22248 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
22249 for (i = 0; i < 16; ++i)
22250 vec[i] = GEN_INT (i % e);
22251 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22252 vt = validize_mem (force_const_mem (V16QImode, vt));
22253 emit_insn (gen_addv16qi3 (mask, mask, vt));
22256 /* The actual shuffle operations all operate on V16QImode. */
22257 op0 = gen_lowpart (V16QImode, op0);
22258 op1 = gen_lowpart (V16QImode, op1);
22260 if (TARGET_XOP)
22262 if (GET_MODE (target) != V16QImode)
22263 target = gen_reg_rtx (V16QImode);
22264 emit_insn (gen_xop_pperm (target, op0, op1, mask));
22265 if (target != operands[0])
22266 emit_move_insn (operands[0],
22267 gen_lowpart (GET_MODE (operands[0]), target));
22269 else if (one_operand_shuffle)
22271 if (GET_MODE (target) != V16QImode)
22272 target = gen_reg_rtx (V16QImode);
22273 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22274 if (target != operands[0])
22275 emit_move_insn (operands[0],
22276 gen_lowpart (GET_MODE (operands[0]), target));
22278 else
22280 rtx xops[6];
22281 bool ok;
22283 /* Shuffle the two input vectors independently. */
22284 t1 = gen_reg_rtx (V16QImode);
22285 t2 = gen_reg_rtx (V16QImode);
22286 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22287 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22289 merge_two:
22290 /* Then merge them together. The key is whether any given control
22291 element contained a bit set that indicates the second word. */
22292 mask = operands[3];
22293 vt = GEN_INT (w);
22294 if (maskmode == V2DImode && !TARGET_SSE4_1)
22296 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
22297 more shuffle to convert the V2DI input mask into a V4SI
22298 input mask. At which point the masking that expand_int_vcond
22299 will work as desired. */
22300 rtx t3 = gen_reg_rtx (V4SImode);
22301 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22302 const0_rtx, const0_rtx,
22303 const2_rtx, const2_rtx));
22304 mask = t3;
22305 maskmode = V4SImode;
22306 e = w = 4;
22309 for (i = 0; i < w; i++)
22310 vec[i] = vt;
22311 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22312 vt = force_reg (maskmode, vt);
22313 mask = expand_simple_binop (maskmode, AND, mask, vt,
22314 NULL_RTX, 0, OPTAB_DIRECT);
22316 if (GET_MODE (target) != mode)
22317 target = gen_reg_rtx (mode);
22318 xops[0] = target;
22319 xops[1] = gen_lowpart (mode, t2);
22320 xops[2] = gen_lowpart (mode, t1);
22321 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22322 xops[4] = mask;
22323 xops[5] = vt;
22324 ok = ix86_expand_int_vcond (xops);
22325 gcc_assert (ok);
22326 if (target != operands[0])
22327 emit_move_insn (operands[0],
22328 gen_lowpart (GET_MODE (operands[0]), target));
22332 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
22333 true if we should do zero extension, else sign extension. HIGH_P is
22334 true if we want the N/2 high elements, else the low elements. */
22336 void
22337 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22339 machine_mode imode = GET_MODE (src);
22340 rtx tmp;
22342 if (TARGET_SSE4_1)
22344 rtx (*unpack)(rtx, rtx);
22345 rtx (*extract)(rtx, rtx) = NULL;
22346 machine_mode halfmode = BLKmode;
22348 switch (imode)
22350 case V64QImode:
22351 if (unsigned_p)
22352 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22353 else
22354 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22355 halfmode = V32QImode;
22356 extract
22357 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22358 break;
22359 case V32QImode:
22360 if (unsigned_p)
22361 unpack = gen_avx2_zero_extendv16qiv16hi2;
22362 else
22363 unpack = gen_avx2_sign_extendv16qiv16hi2;
22364 halfmode = V16QImode;
22365 extract
22366 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22367 break;
22368 case V32HImode:
22369 if (unsigned_p)
22370 unpack = gen_avx512f_zero_extendv16hiv16si2;
22371 else
22372 unpack = gen_avx512f_sign_extendv16hiv16si2;
22373 halfmode = V16HImode;
22374 extract
22375 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22376 break;
22377 case V16HImode:
22378 if (unsigned_p)
22379 unpack = gen_avx2_zero_extendv8hiv8si2;
22380 else
22381 unpack = gen_avx2_sign_extendv8hiv8si2;
22382 halfmode = V8HImode;
22383 extract
22384 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22385 break;
22386 case V16SImode:
22387 if (unsigned_p)
22388 unpack = gen_avx512f_zero_extendv8siv8di2;
22389 else
22390 unpack = gen_avx512f_sign_extendv8siv8di2;
22391 halfmode = V8SImode;
22392 extract
22393 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22394 break;
22395 case V8SImode:
22396 if (unsigned_p)
22397 unpack = gen_avx2_zero_extendv4siv4di2;
22398 else
22399 unpack = gen_avx2_sign_extendv4siv4di2;
22400 halfmode = V4SImode;
22401 extract
22402 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22403 break;
22404 case V16QImode:
22405 if (unsigned_p)
22406 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22407 else
22408 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22409 break;
22410 case V8HImode:
22411 if (unsigned_p)
22412 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22413 else
22414 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22415 break;
22416 case V4SImode:
22417 if (unsigned_p)
22418 unpack = gen_sse4_1_zero_extendv2siv2di2;
22419 else
22420 unpack = gen_sse4_1_sign_extendv2siv2di2;
22421 break;
22422 default:
22423 gcc_unreachable ();
22426 if (GET_MODE_SIZE (imode) >= 32)
22428 tmp = gen_reg_rtx (halfmode);
22429 emit_insn (extract (tmp, src));
22431 else if (high_p)
22433 /* Shift higher 8 bytes to lower 8 bytes. */
22434 tmp = gen_reg_rtx (V1TImode);
22435 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22436 GEN_INT (64)));
22437 tmp = gen_lowpart (imode, tmp);
22439 else
22440 tmp = src;
22442 emit_insn (unpack (dest, tmp));
22444 else
22446 rtx (*unpack)(rtx, rtx, rtx);
22448 switch (imode)
22450 case V16QImode:
22451 if (high_p)
22452 unpack = gen_vec_interleave_highv16qi;
22453 else
22454 unpack = gen_vec_interleave_lowv16qi;
22455 break;
22456 case V8HImode:
22457 if (high_p)
22458 unpack = gen_vec_interleave_highv8hi;
22459 else
22460 unpack = gen_vec_interleave_lowv8hi;
22461 break;
22462 case V4SImode:
22463 if (high_p)
22464 unpack = gen_vec_interleave_highv4si;
22465 else
22466 unpack = gen_vec_interleave_lowv4si;
22467 break;
22468 default:
22469 gcc_unreachable ();
22472 if (unsigned_p)
22473 tmp = force_reg (imode, CONST0_RTX (imode));
22474 else
22475 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22476 src, pc_rtx, pc_rtx);
22478 rtx tmp2 = gen_reg_rtx (imode);
22479 emit_insn (unpack (tmp2, src, tmp));
22480 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22484 /* Expand conditional increment or decrement using adb/sbb instructions.
22485 The default case using setcc followed by the conditional move can be
22486 done by generic code. */
22487 bool
22488 ix86_expand_int_addcc (rtx operands[])
22490 enum rtx_code code = GET_CODE (operands[1]);
22491 rtx flags;
22492 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22493 rtx compare_op;
22494 rtx val = const0_rtx;
22495 bool fpcmp = false;
22496 machine_mode mode;
22497 rtx op0 = XEXP (operands[1], 0);
22498 rtx op1 = XEXP (operands[1], 1);
22500 if (operands[3] != const1_rtx
22501 && operands[3] != constm1_rtx)
22502 return false;
22503 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22504 return false;
22505 code = GET_CODE (compare_op);
22507 flags = XEXP (compare_op, 0);
22509 if (GET_MODE (flags) == CCFPmode
22510 || GET_MODE (flags) == CCFPUmode)
22512 fpcmp = true;
22513 code = ix86_fp_compare_code_to_integer (code);
22516 if (code != LTU)
22518 val = constm1_rtx;
22519 if (fpcmp)
22520 PUT_CODE (compare_op,
22521 reverse_condition_maybe_unordered
22522 (GET_CODE (compare_op)));
22523 else
22524 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22527 mode = GET_MODE (operands[0]);
22529 /* Construct either adc or sbb insn. */
22530 if ((code == LTU) == (operands[3] == constm1_rtx))
22532 switch (mode)
22534 case QImode:
22535 insn = gen_subqi3_carry;
22536 break;
22537 case HImode:
22538 insn = gen_subhi3_carry;
22539 break;
22540 case SImode:
22541 insn = gen_subsi3_carry;
22542 break;
22543 case DImode:
22544 insn = gen_subdi3_carry;
22545 break;
22546 default:
22547 gcc_unreachable ();
22550 else
22552 switch (mode)
22554 case QImode:
22555 insn = gen_addqi3_carry;
22556 break;
22557 case HImode:
22558 insn = gen_addhi3_carry;
22559 break;
22560 case SImode:
22561 insn = gen_addsi3_carry;
22562 break;
22563 case DImode:
22564 insn = gen_adddi3_carry;
22565 break;
22566 default:
22567 gcc_unreachable ();
22570 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22572 return true;
22576 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22577 but works for floating pointer parameters and nonoffsetable memories.
22578 For pushes, it returns just stack offsets; the values will be saved
22579 in the right order. Maximally three parts are generated. */
22581 static int
22582 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22584 int size;
22586 if (!TARGET_64BIT)
22587 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22588 else
22589 size = (GET_MODE_SIZE (mode) + 4) / 8;
22591 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22592 gcc_assert (size >= 2 && size <= 4);
22594 /* Optimize constant pool reference to immediates. This is used by fp
22595 moves, that force all constants to memory to allow combining. */
22596 if (MEM_P (operand) && MEM_READONLY_P (operand))
22598 rtx tmp = maybe_get_pool_constant (operand);
22599 if (tmp)
22600 operand = tmp;
22603 if (MEM_P (operand) && !offsettable_memref_p (operand))
22605 /* The only non-offsetable memories we handle are pushes. */
22606 int ok = push_operand (operand, VOIDmode);
22608 gcc_assert (ok);
22610 operand = copy_rtx (operand);
22611 PUT_MODE (operand, word_mode);
22612 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22613 return size;
22616 if (GET_CODE (operand) == CONST_VECTOR)
22618 machine_mode imode = int_mode_for_mode (mode);
22619 /* Caution: if we looked through a constant pool memory above,
22620 the operand may actually have a different mode now. That's
22621 ok, since we want to pun this all the way back to an integer. */
22622 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22623 gcc_assert (operand != NULL);
22624 mode = imode;
22627 if (!TARGET_64BIT)
22629 if (mode == DImode)
22630 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22631 else
22633 int i;
22635 if (REG_P (operand))
22637 gcc_assert (reload_completed);
22638 for (i = 0; i < size; i++)
22639 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22641 else if (offsettable_memref_p (operand))
22643 operand = adjust_address (operand, SImode, 0);
22644 parts[0] = operand;
22645 for (i = 1; i < size; i++)
22646 parts[i] = adjust_address (operand, SImode, 4 * i);
22648 else if (CONST_DOUBLE_P (operand))
22650 REAL_VALUE_TYPE r;
22651 long l[4];
22653 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22654 switch (mode)
22656 case TFmode:
22657 real_to_target (l, &r, mode);
22658 parts[3] = gen_int_mode (l[3], SImode);
22659 parts[2] = gen_int_mode (l[2], SImode);
22660 break;
22661 case XFmode:
22662 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22663 long double may not be 80-bit. */
22664 real_to_target (l, &r, mode);
22665 parts[2] = gen_int_mode (l[2], SImode);
22666 break;
22667 case DFmode:
22668 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22669 break;
22670 default:
22671 gcc_unreachable ();
22673 parts[1] = gen_int_mode (l[1], SImode);
22674 parts[0] = gen_int_mode (l[0], SImode);
22676 else
22677 gcc_unreachable ();
22680 else
22682 if (mode == TImode)
22683 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22684 if (mode == XFmode || mode == TFmode)
22686 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22687 if (REG_P (operand))
22689 gcc_assert (reload_completed);
22690 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22691 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22693 else if (offsettable_memref_p (operand))
22695 operand = adjust_address (operand, DImode, 0);
22696 parts[0] = operand;
22697 parts[1] = adjust_address (operand, upper_mode, 8);
22699 else if (CONST_DOUBLE_P (operand))
22701 REAL_VALUE_TYPE r;
22702 long l[4];
22704 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22705 real_to_target (l, &r, mode);
22707 /* real_to_target puts 32-bit pieces in each long. */
22708 parts[0] =
22709 gen_int_mode
22710 ((l[0] & (HOST_WIDE_INT) 0xffffffff)
22711 | ((l[1] & (HOST_WIDE_INT) 0xffffffff) << 32),
22712 DImode);
22714 if (upper_mode == SImode)
22715 parts[1] = gen_int_mode (l[2], SImode);
22716 else
22717 parts[1] =
22718 gen_int_mode
22719 ((l[2] & (HOST_WIDE_INT) 0xffffffff)
22720 | ((l[3] & (HOST_WIDE_INT) 0xffffffff) << 32),
22721 DImode);
22723 else
22724 gcc_unreachable ();
22728 return size;
22731 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22732 Return false when normal moves are needed; true when all required
22733 insns have been emitted. Operands 2-4 contain the input values
22734 int the correct order; operands 5-7 contain the output values. */
22736 void
22737 ix86_split_long_move (rtx operands[])
22739 rtx part[2][4];
22740 int nparts, i, j;
22741 int push = 0;
22742 int collisions = 0;
22743 machine_mode mode = GET_MODE (operands[0]);
22744 bool collisionparts[4];
22746 /* The DFmode expanders may ask us to move double.
22747 For 64bit target this is single move. By hiding the fact
22748 here we simplify i386.md splitters. */
22749 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22751 /* Optimize constant pool reference to immediates. This is used by
22752 fp moves, that force all constants to memory to allow combining. */
22754 if (MEM_P (operands[1])
22755 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22756 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22757 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22758 if (push_operand (operands[0], VOIDmode))
22760 operands[0] = copy_rtx (operands[0]);
22761 PUT_MODE (operands[0], word_mode);
22763 else
22764 operands[0] = gen_lowpart (DImode, operands[0]);
22765 operands[1] = gen_lowpart (DImode, operands[1]);
22766 emit_move_insn (operands[0], operands[1]);
22767 return;
22770 /* The only non-offsettable memory we handle is push. */
22771 if (push_operand (operands[0], VOIDmode))
22772 push = 1;
22773 else
22774 gcc_assert (!MEM_P (operands[0])
22775 || offsettable_memref_p (operands[0]));
22777 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22778 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22780 /* When emitting push, take care for source operands on the stack. */
22781 if (push && MEM_P (operands[1])
22782 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22784 rtx src_base = XEXP (part[1][nparts - 1], 0);
22786 /* Compensate for the stack decrement by 4. */
22787 if (!TARGET_64BIT && nparts == 3
22788 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22789 src_base = plus_constant (Pmode, src_base, 4);
22791 /* src_base refers to the stack pointer and is
22792 automatically decreased by emitted push. */
22793 for (i = 0; i < nparts; i++)
22794 part[1][i] = change_address (part[1][i],
22795 GET_MODE (part[1][i]), src_base);
22798 /* We need to do copy in the right order in case an address register
22799 of the source overlaps the destination. */
22800 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22802 rtx tmp;
22804 for (i = 0; i < nparts; i++)
22806 collisionparts[i]
22807 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22808 if (collisionparts[i])
22809 collisions++;
22812 /* Collision in the middle part can be handled by reordering. */
22813 if (collisions == 1 && nparts == 3 && collisionparts [1])
22815 std::swap (part[0][1], part[0][2]);
22816 std::swap (part[1][1], part[1][2]);
22818 else if (collisions == 1
22819 && nparts == 4
22820 && (collisionparts [1] || collisionparts [2]))
22822 if (collisionparts [1])
22824 std::swap (part[0][1], part[0][2]);
22825 std::swap (part[1][1], part[1][2]);
22827 else
22829 std::swap (part[0][2], part[0][3]);
22830 std::swap (part[1][2], part[1][3]);
22834 /* If there are more collisions, we can't handle it by reordering.
22835 Do an lea to the last part and use only one colliding move. */
22836 else if (collisions > 1)
22838 rtx base;
22840 collisions = 1;
22842 base = part[0][nparts - 1];
22844 /* Handle the case when the last part isn't valid for lea.
22845 Happens in 64-bit mode storing the 12-byte XFmode. */
22846 if (GET_MODE (base) != Pmode)
22847 base = gen_rtx_REG (Pmode, REGNO (base));
22849 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22850 part[1][0] = replace_equiv_address (part[1][0], base);
22851 for (i = 1; i < nparts; i++)
22853 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22854 part[1][i] = replace_equiv_address (part[1][i], tmp);
22859 if (push)
22861 if (!TARGET_64BIT)
22863 if (nparts == 3)
22865 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22866 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22867 stack_pointer_rtx, GEN_INT (-4)));
22868 emit_move_insn (part[0][2], part[1][2]);
22870 else if (nparts == 4)
22872 emit_move_insn (part[0][3], part[1][3]);
22873 emit_move_insn (part[0][2], part[1][2]);
22876 else
22878 /* In 64bit mode we don't have 32bit push available. In case this is
22879 register, it is OK - we will just use larger counterpart. We also
22880 retype memory - these comes from attempt to avoid REX prefix on
22881 moving of second half of TFmode value. */
22882 if (GET_MODE (part[1][1]) == SImode)
22884 switch (GET_CODE (part[1][1]))
22886 case MEM:
22887 part[1][1] = adjust_address (part[1][1], DImode, 0);
22888 break;
22890 case REG:
22891 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22892 break;
22894 default:
22895 gcc_unreachable ();
22898 if (GET_MODE (part[1][0]) == SImode)
22899 part[1][0] = part[1][1];
22902 emit_move_insn (part[0][1], part[1][1]);
22903 emit_move_insn (part[0][0], part[1][0]);
22904 return;
22907 /* Choose correct order to not overwrite the source before it is copied. */
22908 if ((REG_P (part[0][0])
22909 && REG_P (part[1][1])
22910 && (REGNO (part[0][0]) == REGNO (part[1][1])
22911 || (nparts == 3
22912 && REGNO (part[0][0]) == REGNO (part[1][2]))
22913 || (nparts == 4
22914 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22915 || (collisions > 0
22916 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22918 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22920 operands[2 + i] = part[0][j];
22921 operands[6 + i] = part[1][j];
22924 else
22926 for (i = 0; i < nparts; i++)
22928 operands[2 + i] = part[0][i];
22929 operands[6 + i] = part[1][i];
22933 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22934 if (optimize_insn_for_size_p ())
22936 for (j = 0; j < nparts - 1; j++)
22937 if (CONST_INT_P (operands[6 + j])
22938 && operands[6 + j] != const0_rtx
22939 && REG_P (operands[2 + j]))
22940 for (i = j; i < nparts - 1; i++)
22941 if (CONST_INT_P (operands[7 + i])
22942 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22943 operands[7 + i] = operands[2 + j];
22946 for (i = 0; i < nparts; i++)
22947 emit_move_insn (operands[2 + i], operands[6 + i]);
22949 return;
22952 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22953 left shift by a constant, either using a single shift or
22954 a sequence of add instructions. */
22956 static void
22957 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
22959 rtx (*insn)(rtx, rtx, rtx);
22961 if (count == 1
22962 || (count * ix86_cost->add <= ix86_cost->shift_const
22963 && !optimize_insn_for_size_p ()))
22965 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22966 while (count-- > 0)
22967 emit_insn (insn (operand, operand, operand));
22969 else
22971 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22972 emit_insn (insn (operand, operand, GEN_INT (count)));
22976 void
22977 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
22979 rtx (*gen_ashl3)(rtx, rtx, rtx);
22980 rtx (*gen_shld)(rtx, rtx, rtx);
22981 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22983 rtx low[2], high[2];
22984 int count;
22986 if (CONST_INT_P (operands[2]))
22988 split_double_mode (mode, operands, 2, low, high);
22989 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22991 if (count >= half_width)
22993 emit_move_insn (high[0], low[1]);
22994 emit_move_insn (low[0], const0_rtx);
22996 if (count > half_width)
22997 ix86_expand_ashl_const (high[0], count - half_width, mode);
22999 else
23001 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23003 if (!rtx_equal_p (operands[0], operands[1]))
23004 emit_move_insn (operands[0], operands[1]);
23006 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
23007 ix86_expand_ashl_const (low[0], count, mode);
23009 return;
23012 split_double_mode (mode, operands, 1, low, high);
23014 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23016 if (operands[1] == const1_rtx)
23018 /* Assuming we've chosen a QImode capable registers, then 1 << N
23019 can be done with two 32/64-bit shifts, no branches, no cmoves. */
23020 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
23022 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
23024 ix86_expand_clear (low[0]);
23025 ix86_expand_clear (high[0]);
23026 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
23028 d = gen_lowpart (QImode, low[0]);
23029 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23030 s = gen_rtx_EQ (QImode, flags, const0_rtx);
23031 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23033 d = gen_lowpart (QImode, high[0]);
23034 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23035 s = gen_rtx_NE (QImode, flags, const0_rtx);
23036 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23039 /* Otherwise, we can get the same results by manually performing
23040 a bit extract operation on bit 5/6, and then performing the two
23041 shifts. The two methods of getting 0/1 into low/high are exactly
23042 the same size. Avoiding the shift in the bit extract case helps
23043 pentium4 a bit; no one else seems to care much either way. */
23044 else
23046 machine_mode half_mode;
23047 rtx (*gen_lshr3)(rtx, rtx, rtx);
23048 rtx (*gen_and3)(rtx, rtx, rtx);
23049 rtx (*gen_xor3)(rtx, rtx, rtx);
23050 HOST_WIDE_INT bits;
23051 rtx x;
23053 if (mode == DImode)
23055 half_mode = SImode;
23056 gen_lshr3 = gen_lshrsi3;
23057 gen_and3 = gen_andsi3;
23058 gen_xor3 = gen_xorsi3;
23059 bits = 5;
23061 else
23063 half_mode = DImode;
23064 gen_lshr3 = gen_lshrdi3;
23065 gen_and3 = gen_anddi3;
23066 gen_xor3 = gen_xordi3;
23067 bits = 6;
23070 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
23071 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
23072 else
23073 x = gen_lowpart (half_mode, operands[2]);
23074 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
23076 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
23077 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
23078 emit_move_insn (low[0], high[0]);
23079 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
23082 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23083 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
23084 return;
23087 if (operands[1] == constm1_rtx)
23089 /* For -1 << N, we can avoid the shld instruction, because we
23090 know that we're shifting 0...31/63 ones into a -1. */
23091 emit_move_insn (low[0], constm1_rtx);
23092 if (optimize_insn_for_size_p ())
23093 emit_move_insn (high[0], low[0]);
23094 else
23095 emit_move_insn (high[0], constm1_rtx);
23097 else
23099 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23101 if (!rtx_equal_p (operands[0], operands[1]))
23102 emit_move_insn (operands[0], operands[1]);
23104 split_double_mode (mode, operands, 1, low, high);
23105 emit_insn (gen_shld (high[0], low[0], operands[2]));
23108 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23110 if (TARGET_CMOVE && scratch)
23112 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23113 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23115 ix86_expand_clear (scratch);
23116 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23118 else
23120 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23121 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23123 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23127 void
23128 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23130 rtx (*gen_ashr3)(rtx, rtx, rtx)
23131 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23132 rtx (*gen_shrd)(rtx, rtx, rtx);
23133 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23135 rtx low[2], high[2];
23136 int count;
23138 if (CONST_INT_P (operands[2]))
23140 split_double_mode (mode, operands, 2, low, high);
23141 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23143 if (count == GET_MODE_BITSIZE (mode) - 1)
23145 emit_move_insn (high[0], high[1]);
23146 emit_insn (gen_ashr3 (high[0], high[0],
23147 GEN_INT (half_width - 1)));
23148 emit_move_insn (low[0], high[0]);
23151 else if (count >= half_width)
23153 emit_move_insn (low[0], high[1]);
23154 emit_move_insn (high[0], low[0]);
23155 emit_insn (gen_ashr3 (high[0], high[0],
23156 GEN_INT (half_width - 1)));
23158 if (count > half_width)
23159 emit_insn (gen_ashr3 (low[0], low[0],
23160 GEN_INT (count - half_width)));
23162 else
23164 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23166 if (!rtx_equal_p (operands[0], operands[1]))
23167 emit_move_insn (operands[0], operands[1]);
23169 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23170 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23173 else
23175 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23177 if (!rtx_equal_p (operands[0], operands[1]))
23178 emit_move_insn (operands[0], operands[1]);
23180 split_double_mode (mode, operands, 1, low, high);
23182 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23183 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23185 if (TARGET_CMOVE && scratch)
23187 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23188 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23190 emit_move_insn (scratch, high[0]);
23191 emit_insn (gen_ashr3 (scratch, scratch,
23192 GEN_INT (half_width - 1)));
23193 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23194 scratch));
23196 else
23198 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23199 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23201 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23206 void
23207 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23209 rtx (*gen_lshr3)(rtx, rtx, rtx)
23210 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23211 rtx (*gen_shrd)(rtx, rtx, rtx);
23212 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23214 rtx low[2], high[2];
23215 int count;
23217 if (CONST_INT_P (operands[2]))
23219 split_double_mode (mode, operands, 2, low, high);
23220 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23222 if (count >= half_width)
23224 emit_move_insn (low[0], high[1]);
23225 ix86_expand_clear (high[0]);
23227 if (count > half_width)
23228 emit_insn (gen_lshr3 (low[0], low[0],
23229 GEN_INT (count - half_width)));
23231 else
23233 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23235 if (!rtx_equal_p (operands[0], operands[1]))
23236 emit_move_insn (operands[0], operands[1]);
23238 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23239 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23242 else
23244 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23246 if (!rtx_equal_p (operands[0], operands[1]))
23247 emit_move_insn (operands[0], operands[1]);
23249 split_double_mode (mode, operands, 1, low, high);
23251 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23252 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23254 if (TARGET_CMOVE && scratch)
23256 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23257 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23259 ix86_expand_clear (scratch);
23260 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23261 scratch));
23263 else
23265 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23266 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23268 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23273 /* Predict just emitted jump instruction to be taken with probability PROB. */
23274 static void
23275 predict_jump (int prob)
23277 rtx insn = get_last_insn ();
23278 gcc_assert (JUMP_P (insn));
23279 add_int_reg_note (insn, REG_BR_PROB, prob);
23282 /* Helper function for the string operations below. Dest VARIABLE whether
23283 it is aligned to VALUE bytes. If true, jump to the label. */
23284 static rtx_code_label *
23285 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23287 rtx_code_label *label = gen_label_rtx ();
23288 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23289 if (GET_MODE (variable) == DImode)
23290 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23291 else
23292 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23293 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23294 1, label);
23295 if (epilogue)
23296 predict_jump (REG_BR_PROB_BASE * 50 / 100);
23297 else
23298 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23299 return label;
23302 /* Adjust COUNTER by the VALUE. */
23303 static void
23304 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23306 rtx (*gen_add)(rtx, rtx, rtx)
23307 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23309 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23312 /* Zero extend possibly SImode EXP to Pmode register. */
23314 ix86_zero_extend_to_Pmode (rtx exp)
23316 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23319 /* Divide COUNTREG by SCALE. */
23320 static rtx
23321 scale_counter (rtx countreg, int scale)
23323 rtx sc;
23325 if (scale == 1)
23326 return countreg;
23327 if (CONST_INT_P (countreg))
23328 return GEN_INT (INTVAL (countreg) / scale);
23329 gcc_assert (REG_P (countreg));
23331 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23332 GEN_INT (exact_log2 (scale)),
23333 NULL, 1, OPTAB_DIRECT);
23334 return sc;
23337 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
23338 DImode for constant loop counts. */
23340 static machine_mode
23341 counter_mode (rtx count_exp)
23343 if (GET_MODE (count_exp) != VOIDmode)
23344 return GET_MODE (count_exp);
23345 if (!CONST_INT_P (count_exp))
23346 return Pmode;
23347 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23348 return DImode;
23349 return SImode;
23352 /* Copy the address to a Pmode register. This is used for x32 to
23353 truncate DImode TLS address to a SImode register. */
23355 static rtx
23356 ix86_copy_addr_to_reg (rtx addr)
23358 rtx reg;
23359 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23361 reg = copy_addr_to_reg (addr);
23362 REG_POINTER (reg) = 1;
23363 return reg;
23365 else
23367 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23368 reg = copy_to_mode_reg (DImode, addr);
23369 REG_POINTER (reg) = 1;
23370 return gen_rtx_SUBREG (SImode, reg, 0);
23374 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23375 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23376 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23377 memory by VALUE (supposed to be in MODE).
23379 The size is rounded down to whole number of chunk size moved at once.
23380 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23383 static void
23384 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23385 rtx destptr, rtx srcptr, rtx value,
23386 rtx count, machine_mode mode, int unroll,
23387 int expected_size, bool issetmem)
23389 rtx_code_label *out_label, *top_label;
23390 rtx iter, tmp;
23391 machine_mode iter_mode = counter_mode (count);
23392 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23393 rtx piece_size = GEN_INT (piece_size_n);
23394 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23395 rtx size;
23396 int i;
23398 top_label = gen_label_rtx ();
23399 out_label = gen_label_rtx ();
23400 iter = gen_reg_rtx (iter_mode);
23402 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23403 NULL, 1, OPTAB_DIRECT);
23404 /* Those two should combine. */
23405 if (piece_size == const1_rtx)
23407 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23408 true, out_label);
23409 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23411 emit_move_insn (iter, const0_rtx);
23413 emit_label (top_label);
23415 tmp = convert_modes (Pmode, iter_mode, iter, true);
23417 /* This assert could be relaxed - in this case we'll need to compute
23418 smallest power of two, containing in PIECE_SIZE_N and pass it to
23419 offset_address. */
23420 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23421 destmem = offset_address (destmem, tmp, piece_size_n);
23422 destmem = adjust_address (destmem, mode, 0);
23424 if (!issetmem)
23426 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23427 srcmem = adjust_address (srcmem, mode, 0);
23429 /* When unrolling for chips that reorder memory reads and writes,
23430 we can save registers by using single temporary.
23431 Also using 4 temporaries is overkill in 32bit mode. */
23432 if (!TARGET_64BIT && 0)
23434 for (i = 0; i < unroll; i++)
23436 if (i)
23438 destmem =
23439 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23440 srcmem =
23441 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23443 emit_move_insn (destmem, srcmem);
23446 else
23448 rtx tmpreg[4];
23449 gcc_assert (unroll <= 4);
23450 for (i = 0; i < unroll; i++)
23452 tmpreg[i] = gen_reg_rtx (mode);
23453 if (i)
23455 srcmem =
23456 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23458 emit_move_insn (tmpreg[i], srcmem);
23460 for (i = 0; i < unroll; i++)
23462 if (i)
23464 destmem =
23465 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23467 emit_move_insn (destmem, tmpreg[i]);
23471 else
23472 for (i = 0; i < unroll; i++)
23474 if (i)
23475 destmem =
23476 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23477 emit_move_insn (destmem, value);
23480 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23481 true, OPTAB_LIB_WIDEN);
23482 if (tmp != iter)
23483 emit_move_insn (iter, tmp);
23485 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23486 true, top_label);
23487 if (expected_size != -1)
23489 expected_size /= GET_MODE_SIZE (mode) * unroll;
23490 if (expected_size == 0)
23491 predict_jump (0);
23492 else if (expected_size > REG_BR_PROB_BASE)
23493 predict_jump (REG_BR_PROB_BASE - 1);
23494 else
23495 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23497 else
23498 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23499 iter = ix86_zero_extend_to_Pmode (iter);
23500 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23501 true, OPTAB_LIB_WIDEN);
23502 if (tmp != destptr)
23503 emit_move_insn (destptr, tmp);
23504 if (!issetmem)
23506 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23507 true, OPTAB_LIB_WIDEN);
23508 if (tmp != srcptr)
23509 emit_move_insn (srcptr, tmp);
23511 emit_label (out_label);
23514 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23515 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23516 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23517 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23518 ORIG_VALUE is the original value passed to memset to fill the memory with.
23519 Other arguments have same meaning as for previous function. */
23521 static void
23522 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23523 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23524 rtx count,
23525 machine_mode mode, bool issetmem)
23527 rtx destexp;
23528 rtx srcexp;
23529 rtx countreg;
23530 HOST_WIDE_INT rounded_count;
23532 /* If possible, it is shorter to use rep movs.
23533 TODO: Maybe it is better to move this logic to decide_alg. */
23534 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23535 && (!issetmem || orig_value == const0_rtx))
23536 mode = SImode;
23538 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23539 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23541 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23542 GET_MODE_SIZE (mode)));
23543 if (mode != QImode)
23545 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23546 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23547 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23549 else
23550 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23551 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23553 rounded_count = (INTVAL (count)
23554 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23555 destmem = shallow_copy_rtx (destmem);
23556 set_mem_size (destmem, rounded_count);
23558 else if (MEM_SIZE_KNOWN_P (destmem))
23559 clear_mem_size (destmem);
23561 if (issetmem)
23563 value = force_reg (mode, gen_lowpart (mode, value));
23564 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23566 else
23568 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23569 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23570 if (mode != QImode)
23572 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23573 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23574 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23576 else
23577 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23578 if (CONST_INT_P (count))
23580 rounded_count = (INTVAL (count)
23581 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23582 srcmem = shallow_copy_rtx (srcmem);
23583 set_mem_size (srcmem, rounded_count);
23585 else
23587 if (MEM_SIZE_KNOWN_P (srcmem))
23588 clear_mem_size (srcmem);
23590 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23591 destexp, srcexp));
23595 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23596 DESTMEM.
23597 SRC is passed by pointer to be updated on return.
23598 Return value is updated DST. */
23599 static rtx
23600 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23601 HOST_WIDE_INT size_to_move)
23603 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23604 enum insn_code code;
23605 machine_mode move_mode;
23606 int piece_size, i;
23608 /* Find the widest mode in which we could perform moves.
23609 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23610 it until move of such size is supported. */
23611 piece_size = 1 << floor_log2 (size_to_move);
23612 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23613 code = optab_handler (mov_optab, move_mode);
23614 while (code == CODE_FOR_nothing && piece_size > 1)
23616 piece_size >>= 1;
23617 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23618 code = optab_handler (mov_optab, move_mode);
23621 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23622 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23623 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23625 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23626 move_mode = mode_for_vector (word_mode, nunits);
23627 code = optab_handler (mov_optab, move_mode);
23628 if (code == CODE_FOR_nothing)
23630 move_mode = word_mode;
23631 piece_size = GET_MODE_SIZE (move_mode);
23632 code = optab_handler (mov_optab, move_mode);
23635 gcc_assert (code != CODE_FOR_nothing);
23637 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23638 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23640 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23641 gcc_assert (size_to_move % piece_size == 0);
23642 adjust = GEN_INT (piece_size);
23643 for (i = 0; i < size_to_move; i += piece_size)
23645 /* We move from memory to memory, so we'll need to do it via
23646 a temporary register. */
23647 tempreg = gen_reg_rtx (move_mode);
23648 emit_insn (GEN_FCN (code) (tempreg, src));
23649 emit_insn (GEN_FCN (code) (dst, tempreg));
23651 emit_move_insn (destptr,
23652 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23653 emit_move_insn (srcptr,
23654 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23656 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23657 piece_size);
23658 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23659 piece_size);
23662 /* Update DST and SRC rtx. */
23663 *srcmem = src;
23664 return dst;
23667 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23668 static void
23669 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23670 rtx destptr, rtx srcptr, rtx count, int max_size)
23672 rtx src, dest;
23673 if (CONST_INT_P (count))
23675 HOST_WIDE_INT countval = INTVAL (count);
23676 HOST_WIDE_INT epilogue_size = countval % max_size;
23677 int i;
23679 /* For now MAX_SIZE should be a power of 2. This assert could be
23680 relaxed, but it'll require a bit more complicated epilogue
23681 expanding. */
23682 gcc_assert ((max_size & (max_size - 1)) == 0);
23683 for (i = max_size; i >= 1; i >>= 1)
23685 if (epilogue_size & i)
23686 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23688 return;
23690 if (max_size > 8)
23692 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23693 count, 1, OPTAB_DIRECT);
23694 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23695 count, QImode, 1, 4, false);
23696 return;
23699 /* When there are stringops, we can cheaply increase dest and src pointers.
23700 Otherwise we save code size by maintaining offset (zero is readily
23701 available from preceding rep operation) and using x86 addressing modes.
23703 if (TARGET_SINGLE_STRINGOP)
23705 if (max_size > 4)
23707 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23708 src = change_address (srcmem, SImode, srcptr);
23709 dest = change_address (destmem, SImode, destptr);
23710 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23711 emit_label (label);
23712 LABEL_NUSES (label) = 1;
23714 if (max_size > 2)
23716 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23717 src = change_address (srcmem, HImode, srcptr);
23718 dest = change_address (destmem, HImode, destptr);
23719 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23720 emit_label (label);
23721 LABEL_NUSES (label) = 1;
23723 if (max_size > 1)
23725 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23726 src = change_address (srcmem, QImode, srcptr);
23727 dest = change_address (destmem, QImode, destptr);
23728 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23729 emit_label (label);
23730 LABEL_NUSES (label) = 1;
23733 else
23735 rtx offset = force_reg (Pmode, const0_rtx);
23736 rtx tmp;
23738 if (max_size > 4)
23740 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23741 src = change_address (srcmem, SImode, srcptr);
23742 dest = change_address (destmem, SImode, destptr);
23743 emit_move_insn (dest, src);
23744 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23745 true, OPTAB_LIB_WIDEN);
23746 if (tmp != offset)
23747 emit_move_insn (offset, tmp);
23748 emit_label (label);
23749 LABEL_NUSES (label) = 1;
23751 if (max_size > 2)
23753 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23754 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23755 src = change_address (srcmem, HImode, tmp);
23756 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23757 dest = change_address (destmem, HImode, tmp);
23758 emit_move_insn (dest, src);
23759 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23760 true, OPTAB_LIB_WIDEN);
23761 if (tmp != offset)
23762 emit_move_insn (offset, tmp);
23763 emit_label (label);
23764 LABEL_NUSES (label) = 1;
23766 if (max_size > 1)
23768 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23769 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23770 src = change_address (srcmem, QImode, tmp);
23771 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23772 dest = change_address (destmem, QImode, tmp);
23773 emit_move_insn (dest, src);
23774 emit_label (label);
23775 LABEL_NUSES (label) = 1;
23780 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23781 with value PROMOTED_VAL.
23782 SRC is passed by pointer to be updated on return.
23783 Return value is updated DST. */
23784 static rtx
23785 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23786 HOST_WIDE_INT size_to_move)
23788 rtx dst = destmem, adjust;
23789 enum insn_code code;
23790 machine_mode move_mode;
23791 int piece_size, i;
23793 /* Find the widest mode in which we could perform moves.
23794 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23795 it until move of such size is supported. */
23796 move_mode = GET_MODE (promoted_val);
23797 if (move_mode == VOIDmode)
23798 move_mode = QImode;
23799 if (size_to_move < GET_MODE_SIZE (move_mode))
23801 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23802 promoted_val = gen_lowpart (move_mode, promoted_val);
23804 piece_size = GET_MODE_SIZE (move_mode);
23805 code = optab_handler (mov_optab, move_mode);
23806 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23808 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23810 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23811 gcc_assert (size_to_move % piece_size == 0);
23812 adjust = GEN_INT (piece_size);
23813 for (i = 0; i < size_to_move; i += piece_size)
23815 if (piece_size <= GET_MODE_SIZE (word_mode))
23817 emit_insn (gen_strset (destptr, dst, promoted_val));
23818 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23819 piece_size);
23820 continue;
23823 emit_insn (GEN_FCN (code) (dst, promoted_val));
23825 emit_move_insn (destptr,
23826 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23828 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23829 piece_size);
23832 /* Update DST rtx. */
23833 return dst;
23835 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23836 static void
23837 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23838 rtx count, int max_size)
23840 count =
23841 expand_simple_binop (counter_mode (count), AND, count,
23842 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23843 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23844 gen_lowpart (QImode, value), count, QImode,
23845 1, max_size / 2, true);
23848 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23849 static void
23850 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23851 rtx count, int max_size)
23853 rtx dest;
23855 if (CONST_INT_P (count))
23857 HOST_WIDE_INT countval = INTVAL (count);
23858 HOST_WIDE_INT epilogue_size = countval % max_size;
23859 int i;
23861 /* For now MAX_SIZE should be a power of 2. This assert could be
23862 relaxed, but it'll require a bit more complicated epilogue
23863 expanding. */
23864 gcc_assert ((max_size & (max_size - 1)) == 0);
23865 for (i = max_size; i >= 1; i >>= 1)
23867 if (epilogue_size & i)
23869 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23870 destmem = emit_memset (destmem, destptr, vec_value, i);
23871 else
23872 destmem = emit_memset (destmem, destptr, value, i);
23875 return;
23877 if (max_size > 32)
23879 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23880 return;
23882 if (max_size > 16)
23884 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23885 if (TARGET_64BIT)
23887 dest = change_address (destmem, DImode, destptr);
23888 emit_insn (gen_strset (destptr, dest, value));
23889 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23890 emit_insn (gen_strset (destptr, dest, value));
23892 else
23894 dest = change_address (destmem, SImode, destptr);
23895 emit_insn (gen_strset (destptr, dest, value));
23896 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23897 emit_insn (gen_strset (destptr, dest, value));
23898 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23899 emit_insn (gen_strset (destptr, dest, value));
23900 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23901 emit_insn (gen_strset (destptr, dest, value));
23903 emit_label (label);
23904 LABEL_NUSES (label) = 1;
23906 if (max_size > 8)
23908 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
23909 if (TARGET_64BIT)
23911 dest = change_address (destmem, DImode, destptr);
23912 emit_insn (gen_strset (destptr, dest, value));
23914 else
23916 dest = change_address (destmem, SImode, destptr);
23917 emit_insn (gen_strset (destptr, dest, value));
23918 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23919 emit_insn (gen_strset (destptr, dest, value));
23921 emit_label (label);
23922 LABEL_NUSES (label) = 1;
23924 if (max_size > 4)
23926 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23927 dest = change_address (destmem, SImode, destptr);
23928 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23929 emit_label (label);
23930 LABEL_NUSES (label) = 1;
23932 if (max_size > 2)
23934 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23935 dest = change_address (destmem, HImode, destptr);
23936 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23937 emit_label (label);
23938 LABEL_NUSES (label) = 1;
23940 if (max_size > 1)
23942 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23943 dest = change_address (destmem, QImode, destptr);
23944 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23945 emit_label (label);
23946 LABEL_NUSES (label) = 1;
23950 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23951 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23952 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23953 ignored.
23954 Return value is updated DESTMEM. */
23955 static rtx
23956 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23957 rtx destptr, rtx srcptr, rtx value,
23958 rtx vec_value, rtx count, int align,
23959 int desired_alignment, bool issetmem)
23961 int i;
23962 for (i = 1; i < desired_alignment; i <<= 1)
23964 if (align <= i)
23966 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
23967 if (issetmem)
23969 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23970 destmem = emit_memset (destmem, destptr, vec_value, i);
23971 else
23972 destmem = emit_memset (destmem, destptr, value, i);
23974 else
23975 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23976 ix86_adjust_counter (count, i);
23977 emit_label (label);
23978 LABEL_NUSES (label) = 1;
23979 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23982 return destmem;
23985 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23986 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23987 and jump to DONE_LABEL. */
23988 static void
23989 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23990 rtx destptr, rtx srcptr,
23991 rtx value, rtx vec_value,
23992 rtx count, int size,
23993 rtx done_label, bool issetmem)
23995 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
23996 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23997 rtx modesize;
23998 int n;
24000 /* If we do not have vector value to copy, we must reduce size. */
24001 if (issetmem)
24003 if (!vec_value)
24005 if (GET_MODE (value) == VOIDmode && size > 8)
24006 mode = Pmode;
24007 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
24008 mode = GET_MODE (value);
24010 else
24011 mode = GET_MODE (vec_value), value = vec_value;
24013 else
24015 /* Choose appropriate vector mode. */
24016 if (size >= 32)
24017 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
24018 else if (size >= 16)
24019 mode = TARGET_SSE ? V16QImode : DImode;
24020 srcmem = change_address (srcmem, mode, srcptr);
24022 destmem = change_address (destmem, mode, destptr);
24023 modesize = GEN_INT (GET_MODE_SIZE (mode));
24024 gcc_assert (GET_MODE_SIZE (mode) <= size);
24025 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24027 if (issetmem)
24028 emit_move_insn (destmem, gen_lowpart (mode, value));
24029 else
24031 emit_move_insn (destmem, srcmem);
24032 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24034 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24037 destmem = offset_address (destmem, count, 1);
24038 destmem = offset_address (destmem, GEN_INT (-2 * size),
24039 GET_MODE_SIZE (mode));
24040 if (!issetmem)
24042 srcmem = offset_address (srcmem, count, 1);
24043 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
24044 GET_MODE_SIZE (mode));
24046 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24048 if (issetmem)
24049 emit_move_insn (destmem, gen_lowpart (mode, value));
24050 else
24052 emit_move_insn (destmem, srcmem);
24053 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24055 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24057 emit_jump_insn (gen_jump (done_label));
24058 emit_barrier ();
24060 emit_label (label);
24061 LABEL_NUSES (label) = 1;
24064 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
24065 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
24066 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
24067 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
24068 DONE_LABEL is a label after the whole copying sequence. The label is created
24069 on demand if *DONE_LABEL is NULL.
24070 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
24071 bounds after the initial copies.
24073 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
24074 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
24075 we will dispatch to a library call for large blocks.
24077 In pseudocode we do:
24079 if (COUNT < SIZE)
24081 Assume that SIZE is 4. Bigger sizes are handled analogously
24082 if (COUNT & 4)
24084 copy 4 bytes from SRCPTR to DESTPTR
24085 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
24086 goto done_label
24088 if (!COUNT)
24089 goto done_label;
24090 copy 1 byte from SRCPTR to DESTPTR
24091 if (COUNT & 2)
24093 copy 2 bytes from SRCPTR to DESTPTR
24094 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
24097 else
24099 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
24100 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
24102 OLD_DESPTR = DESTPTR;
24103 Align DESTPTR up to DESIRED_ALIGN
24104 SRCPTR += DESTPTR - OLD_DESTPTR
24105 COUNT -= DEST_PTR - OLD_DESTPTR
24106 if (DYNAMIC_CHECK)
24107 Round COUNT down to multiple of SIZE
24108 << optional caller supplied zero size guard is here >>
24109 << optional caller suppplied dynamic check is here >>
24110 << caller supplied main copy loop is here >>
24112 done_label:
24114 static void
24115 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24116 rtx *destptr, rtx *srcptr,
24117 machine_mode mode,
24118 rtx value, rtx vec_value,
24119 rtx *count,
24120 rtx_code_label **done_label,
24121 int size,
24122 int desired_align,
24123 int align,
24124 unsigned HOST_WIDE_INT *min_size,
24125 bool dynamic_check,
24126 bool issetmem)
24128 rtx_code_label *loop_label = NULL, *label;
24129 int n;
24130 rtx modesize;
24131 int prolog_size = 0;
24132 rtx mode_value;
24134 /* Chose proper value to copy. */
24135 if (issetmem && VECTOR_MODE_P (mode))
24136 mode_value = vec_value;
24137 else
24138 mode_value = value;
24139 gcc_assert (GET_MODE_SIZE (mode) <= size);
24141 /* See if block is big or small, handle small blocks. */
24142 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24144 int size2 = size;
24145 loop_label = gen_label_rtx ();
24147 if (!*done_label)
24148 *done_label = gen_label_rtx ();
24150 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24151 1, loop_label);
24152 size2 >>= 1;
24154 /* Handle sizes > 3. */
24155 for (;size2 > 2; size2 >>= 1)
24156 expand_small_movmem_or_setmem (destmem, srcmem,
24157 *destptr, *srcptr,
24158 value, vec_value,
24159 *count,
24160 size2, *done_label, issetmem);
24161 /* Nothing to copy? Jump to DONE_LABEL if so */
24162 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24163 1, *done_label);
24165 /* Do a byte copy. */
24166 destmem = change_address (destmem, QImode, *destptr);
24167 if (issetmem)
24168 emit_move_insn (destmem, gen_lowpart (QImode, value));
24169 else
24171 srcmem = change_address (srcmem, QImode, *srcptr);
24172 emit_move_insn (destmem, srcmem);
24175 /* Handle sizes 2 and 3. */
24176 label = ix86_expand_aligntest (*count, 2, false);
24177 destmem = change_address (destmem, HImode, *destptr);
24178 destmem = offset_address (destmem, *count, 1);
24179 destmem = offset_address (destmem, GEN_INT (-2), 2);
24180 if (issetmem)
24181 emit_move_insn (destmem, gen_lowpart (HImode, value));
24182 else
24184 srcmem = change_address (srcmem, HImode, *srcptr);
24185 srcmem = offset_address (srcmem, *count, 1);
24186 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24187 emit_move_insn (destmem, srcmem);
24190 emit_label (label);
24191 LABEL_NUSES (label) = 1;
24192 emit_jump_insn (gen_jump (*done_label));
24193 emit_barrier ();
24195 else
24196 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24197 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24199 /* Start memcpy for COUNT >= SIZE. */
24200 if (loop_label)
24202 emit_label (loop_label);
24203 LABEL_NUSES (loop_label) = 1;
24206 /* Copy first desired_align bytes. */
24207 if (!issetmem)
24208 srcmem = change_address (srcmem, mode, *srcptr);
24209 destmem = change_address (destmem, mode, *destptr);
24210 modesize = GEN_INT (GET_MODE_SIZE (mode));
24211 for (n = 0; prolog_size < desired_align - align; n++)
24213 if (issetmem)
24214 emit_move_insn (destmem, mode_value);
24215 else
24217 emit_move_insn (destmem, srcmem);
24218 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24220 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24221 prolog_size += GET_MODE_SIZE (mode);
24225 /* Copy last SIZE bytes. */
24226 destmem = offset_address (destmem, *count, 1);
24227 destmem = offset_address (destmem,
24228 GEN_INT (-size - prolog_size),
24230 if (issetmem)
24231 emit_move_insn (destmem, mode_value);
24232 else
24234 srcmem = offset_address (srcmem, *count, 1);
24235 srcmem = offset_address (srcmem,
24236 GEN_INT (-size - prolog_size),
24238 emit_move_insn (destmem, srcmem);
24240 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24242 destmem = offset_address (destmem, modesize, 1);
24243 if (issetmem)
24244 emit_move_insn (destmem, mode_value);
24245 else
24247 srcmem = offset_address (srcmem, modesize, 1);
24248 emit_move_insn (destmem, srcmem);
24252 /* Align destination. */
24253 if (desired_align > 1 && desired_align > align)
24255 rtx saveddest = *destptr;
24257 gcc_assert (desired_align <= size);
24258 /* Align destptr up, place it to new register. */
24259 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24260 GEN_INT (prolog_size),
24261 NULL_RTX, 1, OPTAB_DIRECT);
24262 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
24263 REG_POINTER (*destptr) = 1;
24264 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24265 GEN_INT (-desired_align),
24266 *destptr, 1, OPTAB_DIRECT);
24267 /* See how many bytes we skipped. */
24268 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24269 *destptr,
24270 saveddest, 1, OPTAB_DIRECT);
24271 /* Adjust srcptr and count. */
24272 if (!issetmem)
24273 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
24274 saveddest, *srcptr, 1, OPTAB_DIRECT);
24275 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24276 saveddest, *count, 1, OPTAB_DIRECT);
24277 /* We copied at most size + prolog_size. */
24278 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24279 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24280 else
24281 *min_size = 0;
24283 /* Our loops always round down the bock size, but for dispatch to library
24284 we need precise value. */
24285 if (dynamic_check)
24286 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
24287 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24289 else
24291 gcc_assert (prolog_size == 0);
24292 /* Decrease count, so we won't end up copying last word twice. */
24293 if (!CONST_INT_P (*count))
24294 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24295 constm1_rtx, *count, 1, OPTAB_DIRECT);
24296 else
24297 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24298 if (*min_size)
24299 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24304 /* This function is like the previous one, except here we know how many bytes
24305 need to be copied. That allows us to update alignment not only of DST, which
24306 is returned, but also of SRC, which is passed as a pointer for that
24307 reason. */
24308 static rtx
24309 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24310 rtx srcreg, rtx value, rtx vec_value,
24311 int desired_align, int align_bytes,
24312 bool issetmem)
24314 rtx src = NULL;
24315 rtx orig_dst = dst;
24316 rtx orig_src = NULL;
24317 int piece_size = 1;
24318 int copied_bytes = 0;
24320 if (!issetmem)
24322 gcc_assert (srcp != NULL);
24323 src = *srcp;
24324 orig_src = src;
24327 for (piece_size = 1;
24328 piece_size <= desired_align && copied_bytes < align_bytes;
24329 piece_size <<= 1)
24331 if (align_bytes & piece_size)
24333 if (issetmem)
24335 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24336 dst = emit_memset (dst, destreg, vec_value, piece_size);
24337 else
24338 dst = emit_memset (dst, destreg, value, piece_size);
24340 else
24341 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24342 copied_bytes += piece_size;
24345 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24346 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24347 if (MEM_SIZE_KNOWN_P (orig_dst))
24348 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24350 if (!issetmem)
24352 int src_align_bytes = get_mem_align_offset (src, desired_align
24353 * BITS_PER_UNIT);
24354 if (src_align_bytes >= 0)
24355 src_align_bytes = desired_align - src_align_bytes;
24356 if (src_align_bytes >= 0)
24358 unsigned int src_align;
24359 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24361 if ((src_align_bytes & (src_align - 1))
24362 == (align_bytes & (src_align - 1)))
24363 break;
24365 if (src_align > (unsigned int) desired_align)
24366 src_align = desired_align;
24367 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24368 set_mem_align (src, src_align * BITS_PER_UNIT);
24370 if (MEM_SIZE_KNOWN_P (orig_src))
24371 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24372 *srcp = src;
24375 return dst;
24378 /* Return true if ALG can be used in current context.
24379 Assume we expand memset if MEMSET is true. */
24380 static bool
24381 alg_usable_p (enum stringop_alg alg, bool memset)
24383 if (alg == no_stringop)
24384 return false;
24385 if (alg == vector_loop)
24386 return TARGET_SSE || TARGET_AVX;
24387 /* Algorithms using the rep prefix want at least edi and ecx;
24388 additionally, memset wants eax and memcpy wants esi. Don't
24389 consider such algorithms if the user has appropriated those
24390 registers for their own purposes. */
24391 if (alg == rep_prefix_1_byte
24392 || alg == rep_prefix_4_byte
24393 || alg == rep_prefix_8_byte)
24394 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24395 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24396 return true;
24399 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24400 static enum stringop_alg
24401 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24402 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24403 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24405 const struct stringop_algs * algs;
24406 bool optimize_for_speed;
24407 int max = 0;
24408 const struct processor_costs *cost;
24409 int i;
24410 bool any_alg_usable_p = false;
24412 *noalign = false;
24413 *dynamic_check = -1;
24415 /* Even if the string operation call is cold, we still might spend a lot
24416 of time processing large blocks. */
24417 if (optimize_function_for_size_p (cfun)
24418 || (optimize_insn_for_size_p ()
24419 && (max_size < 256
24420 || (expected_size != -1 && expected_size < 256))))
24421 optimize_for_speed = false;
24422 else
24423 optimize_for_speed = true;
24425 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24426 if (memset)
24427 algs = &cost->memset[TARGET_64BIT != 0];
24428 else
24429 algs = &cost->memcpy[TARGET_64BIT != 0];
24431 /* See maximal size for user defined algorithm. */
24432 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24434 enum stringop_alg candidate = algs->size[i].alg;
24435 bool usable = alg_usable_p (candidate, memset);
24436 any_alg_usable_p |= usable;
24438 if (candidate != libcall && candidate && usable)
24439 max = algs->size[i].max;
24442 /* If expected size is not known but max size is small enough
24443 so inline version is a win, set expected size into
24444 the range. */
24445 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24446 && expected_size == -1)
24447 expected_size = min_size / 2 + max_size / 2;
24449 /* If user specified the algorithm, honnor it if possible. */
24450 if (ix86_stringop_alg != no_stringop
24451 && alg_usable_p (ix86_stringop_alg, memset))
24452 return ix86_stringop_alg;
24453 /* rep; movq or rep; movl is the smallest variant. */
24454 else if (!optimize_for_speed)
24456 *noalign = true;
24457 if (!count || (count & 3) || (memset && !zero_memset))
24458 return alg_usable_p (rep_prefix_1_byte, memset)
24459 ? rep_prefix_1_byte : loop_1_byte;
24460 else
24461 return alg_usable_p (rep_prefix_4_byte, memset)
24462 ? rep_prefix_4_byte : loop;
24464 /* Very tiny blocks are best handled via the loop, REP is expensive to
24465 setup. */
24466 else if (expected_size != -1 && expected_size < 4)
24467 return loop_1_byte;
24468 else if (expected_size != -1)
24470 enum stringop_alg alg = libcall;
24471 bool alg_noalign = false;
24472 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24474 /* We get here if the algorithms that were not libcall-based
24475 were rep-prefix based and we are unable to use rep prefixes
24476 based on global register usage. Break out of the loop and
24477 use the heuristic below. */
24478 if (algs->size[i].max == 0)
24479 break;
24480 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24482 enum stringop_alg candidate = algs->size[i].alg;
24484 if (candidate != libcall && alg_usable_p (candidate, memset))
24486 alg = candidate;
24487 alg_noalign = algs->size[i].noalign;
24489 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24490 last non-libcall inline algorithm. */
24491 if (TARGET_INLINE_ALL_STRINGOPS)
24493 /* When the current size is best to be copied by a libcall,
24494 but we are still forced to inline, run the heuristic below
24495 that will pick code for medium sized blocks. */
24496 if (alg != libcall)
24498 *noalign = alg_noalign;
24499 return alg;
24501 else if (!any_alg_usable_p)
24502 break;
24504 else if (alg_usable_p (candidate, memset))
24506 *noalign = algs->size[i].noalign;
24507 return candidate;
24512 /* When asked to inline the call anyway, try to pick meaningful choice.
24513 We look for maximal size of block that is faster to copy by hand and
24514 take blocks of at most of that size guessing that average size will
24515 be roughly half of the block.
24517 If this turns out to be bad, we might simply specify the preferred
24518 choice in ix86_costs. */
24519 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24520 && (algs->unknown_size == libcall
24521 || !alg_usable_p (algs->unknown_size, memset)))
24523 enum stringop_alg alg;
24525 /* If there aren't any usable algorithms, then recursing on
24526 smaller sizes isn't going to find anything. Just return the
24527 simple byte-at-a-time copy loop. */
24528 if (!any_alg_usable_p)
24530 /* Pick something reasonable. */
24531 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24532 *dynamic_check = 128;
24533 return loop_1_byte;
24535 if (max <= 0)
24536 max = 4096;
24537 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24538 zero_memset, dynamic_check, noalign);
24539 gcc_assert (*dynamic_check == -1);
24540 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24541 *dynamic_check = max;
24542 else
24543 gcc_assert (alg != libcall);
24544 return alg;
24546 return (alg_usable_p (algs->unknown_size, memset)
24547 ? algs->unknown_size : libcall);
24550 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24551 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24552 static int
24553 decide_alignment (int align,
24554 enum stringop_alg alg,
24555 int expected_size,
24556 machine_mode move_mode)
24558 int desired_align = 0;
24560 gcc_assert (alg != no_stringop);
24562 if (alg == libcall)
24563 return 0;
24564 if (move_mode == VOIDmode)
24565 return 0;
24567 desired_align = GET_MODE_SIZE (move_mode);
24568 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24569 copying whole cacheline at once. */
24570 if (TARGET_PENTIUMPRO
24571 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24572 desired_align = 8;
24574 if (optimize_size)
24575 desired_align = 1;
24576 if (desired_align < align)
24577 desired_align = align;
24578 if (expected_size != -1 && expected_size < 4)
24579 desired_align = align;
24581 return desired_align;
24585 /* Helper function for memcpy. For QImode value 0xXY produce
24586 0xXYXYXYXY of wide specified by MODE. This is essentially
24587 a * 0x10101010, but we can do slightly better than
24588 synth_mult by unwinding the sequence by hand on CPUs with
24589 slow multiply. */
24590 static rtx
24591 promote_duplicated_reg (machine_mode mode, rtx val)
24593 machine_mode valmode = GET_MODE (val);
24594 rtx tmp;
24595 int nops = mode == DImode ? 3 : 2;
24597 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24598 if (val == const0_rtx)
24599 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24600 if (CONST_INT_P (val))
24602 HOST_WIDE_INT v = INTVAL (val) & 255;
24604 v |= v << 8;
24605 v |= v << 16;
24606 if (mode == DImode)
24607 v |= (v << 16) << 16;
24608 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24611 if (valmode == VOIDmode)
24612 valmode = QImode;
24613 if (valmode != QImode)
24614 val = gen_lowpart (QImode, val);
24615 if (mode == QImode)
24616 return val;
24617 if (!TARGET_PARTIAL_REG_STALL)
24618 nops--;
24619 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24620 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24621 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24622 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24624 rtx reg = convert_modes (mode, QImode, val, true);
24625 tmp = promote_duplicated_reg (mode, const1_rtx);
24626 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24627 OPTAB_DIRECT);
24629 else
24631 rtx reg = convert_modes (mode, QImode, val, true);
24633 if (!TARGET_PARTIAL_REG_STALL)
24634 if (mode == SImode)
24635 emit_insn (gen_movsi_insv_1 (reg, reg));
24636 else
24637 emit_insn (gen_movdi_insv_1 (reg, reg));
24638 else
24640 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24641 NULL, 1, OPTAB_DIRECT);
24642 reg =
24643 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24645 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24646 NULL, 1, OPTAB_DIRECT);
24647 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24648 if (mode == SImode)
24649 return reg;
24650 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24651 NULL, 1, OPTAB_DIRECT);
24652 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24653 return reg;
24657 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24658 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24659 alignment from ALIGN to DESIRED_ALIGN. */
24660 static rtx
24661 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24662 int align)
24664 rtx promoted_val;
24666 if (TARGET_64BIT
24667 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24668 promoted_val = promote_duplicated_reg (DImode, val);
24669 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24670 promoted_val = promote_duplicated_reg (SImode, val);
24671 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24672 promoted_val = promote_duplicated_reg (HImode, val);
24673 else
24674 promoted_val = val;
24676 return promoted_val;
24679 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24680 operations when profitable. The code depends upon architecture, block size
24681 and alignment, but always has one of the following overall structures:
24683 Aligned move sequence:
24685 1) Prologue guard: Conditional that jumps up to epilogues for small
24686 blocks that can be handled by epilogue alone. This is faster
24687 but also needed for correctness, since prologue assume the block
24688 is larger than the desired alignment.
24690 Optional dynamic check for size and libcall for large
24691 blocks is emitted here too, with -minline-stringops-dynamically.
24693 2) Prologue: copy first few bytes in order to get destination
24694 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24695 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24696 copied. We emit either a jump tree on power of two sized
24697 blocks, or a byte loop.
24699 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24700 with specified algorithm.
24702 4) Epilogue: code copying tail of the block that is too small to be
24703 handled by main body (or up to size guarded by prologue guard).
24705 Misaligned move sequence
24707 1) missaligned move prologue/epilogue containing:
24708 a) Prologue handling small memory blocks and jumping to done_label
24709 (skipped if blocks are known to be large enough)
24710 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24711 needed by single possibly misaligned move
24712 (skipped if alignment is not needed)
24713 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24715 2) Zero size guard dispatching to done_label, if needed
24717 3) dispatch to library call, if needed,
24719 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24720 with specified algorithm. */
24721 bool
24722 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24723 rtx align_exp, rtx expected_align_exp,
24724 rtx expected_size_exp, rtx min_size_exp,
24725 rtx max_size_exp, rtx probable_max_size_exp,
24726 bool issetmem)
24728 rtx destreg;
24729 rtx srcreg = NULL;
24730 rtx_code_label *label = NULL;
24731 rtx tmp;
24732 rtx_code_label *jump_around_label = NULL;
24733 HOST_WIDE_INT align = 1;
24734 unsigned HOST_WIDE_INT count = 0;
24735 HOST_WIDE_INT expected_size = -1;
24736 int size_needed = 0, epilogue_size_needed;
24737 int desired_align = 0, align_bytes = 0;
24738 enum stringop_alg alg;
24739 rtx promoted_val = NULL;
24740 rtx vec_promoted_val = NULL;
24741 bool force_loopy_epilogue = false;
24742 int dynamic_check;
24743 bool need_zero_guard = false;
24744 bool noalign;
24745 machine_mode move_mode = VOIDmode;
24746 int unroll_factor = 1;
24747 /* TODO: Once value ranges are available, fill in proper data. */
24748 unsigned HOST_WIDE_INT min_size = 0;
24749 unsigned HOST_WIDE_INT max_size = -1;
24750 unsigned HOST_WIDE_INT probable_max_size = -1;
24751 bool misaligned_prologue_used = false;
24753 if (CONST_INT_P (align_exp))
24754 align = INTVAL (align_exp);
24755 /* i386 can do misaligned access on reasonably increased cost. */
24756 if (CONST_INT_P (expected_align_exp)
24757 && INTVAL (expected_align_exp) > align)
24758 align = INTVAL (expected_align_exp);
24759 /* ALIGN is the minimum of destination and source alignment, but we care here
24760 just about destination alignment. */
24761 else if (!issetmem
24762 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24763 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24765 if (CONST_INT_P (count_exp))
24767 min_size = max_size = probable_max_size = count = expected_size
24768 = INTVAL (count_exp);
24769 /* When COUNT is 0, there is nothing to do. */
24770 if (!count)
24771 return true;
24773 else
24775 if (min_size_exp)
24776 min_size = INTVAL (min_size_exp);
24777 if (max_size_exp)
24778 max_size = INTVAL (max_size_exp);
24779 if (probable_max_size_exp)
24780 probable_max_size = INTVAL (probable_max_size_exp);
24781 if (CONST_INT_P (expected_size_exp))
24782 expected_size = INTVAL (expected_size_exp);
24785 /* Make sure we don't need to care about overflow later on. */
24786 if (count > (HOST_WIDE_INT_1U << 30))
24787 return false;
24789 /* Step 0: Decide on preferred algorithm, desired alignment and
24790 size of chunks to be copied by main loop. */
24791 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24792 issetmem,
24793 issetmem && val_exp == const0_rtx,
24794 &dynamic_check, &noalign);
24795 if (alg == libcall)
24796 return false;
24797 gcc_assert (alg != no_stringop);
24799 /* For now vector-version of memset is generated only for memory zeroing, as
24800 creating of promoted vector value is very cheap in this case. */
24801 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24802 alg = unrolled_loop;
24804 if (!count)
24805 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24806 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24807 if (!issetmem)
24808 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24810 unroll_factor = 1;
24811 move_mode = word_mode;
24812 switch (alg)
24814 case libcall:
24815 case no_stringop:
24816 case last_alg:
24817 gcc_unreachable ();
24818 case loop_1_byte:
24819 need_zero_guard = true;
24820 move_mode = QImode;
24821 break;
24822 case loop:
24823 need_zero_guard = true;
24824 break;
24825 case unrolled_loop:
24826 need_zero_guard = true;
24827 unroll_factor = (TARGET_64BIT ? 4 : 2);
24828 break;
24829 case vector_loop:
24830 need_zero_guard = true;
24831 unroll_factor = 4;
24832 /* Find the widest supported mode. */
24833 move_mode = word_mode;
24834 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24835 != CODE_FOR_nothing)
24836 move_mode = GET_MODE_WIDER_MODE (move_mode);
24838 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24839 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24840 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24842 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24843 move_mode = mode_for_vector (word_mode, nunits);
24844 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24845 move_mode = word_mode;
24847 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24848 break;
24849 case rep_prefix_8_byte:
24850 move_mode = DImode;
24851 break;
24852 case rep_prefix_4_byte:
24853 move_mode = SImode;
24854 break;
24855 case rep_prefix_1_byte:
24856 move_mode = QImode;
24857 break;
24859 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24860 epilogue_size_needed = size_needed;
24862 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24863 if (!TARGET_ALIGN_STRINGOPS || noalign)
24864 align = desired_align;
24866 /* Step 1: Prologue guard. */
24868 /* Alignment code needs count to be in register. */
24869 if (CONST_INT_P (count_exp) && desired_align > align)
24871 if (INTVAL (count_exp) > desired_align
24872 && INTVAL (count_exp) > size_needed)
24874 align_bytes
24875 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24876 if (align_bytes <= 0)
24877 align_bytes = 0;
24878 else
24879 align_bytes = desired_align - align_bytes;
24881 if (align_bytes == 0)
24882 count_exp = force_reg (counter_mode (count_exp), count_exp);
24884 gcc_assert (desired_align >= 1 && align >= 1);
24886 /* Misaligned move sequences handle both prologue and epilogue at once.
24887 Default code generation results in a smaller code for large alignments
24888 and also avoids redundant job when sizes are known precisely. */
24889 misaligned_prologue_used
24890 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24891 && MAX (desired_align, epilogue_size_needed) <= 32
24892 && desired_align <= epilogue_size_needed
24893 && ((desired_align > align && !align_bytes)
24894 || (!count && epilogue_size_needed > 1)));
24896 /* Do the cheap promotion to allow better CSE across the
24897 main loop and epilogue (ie one load of the big constant in the
24898 front of all code.
24899 For now the misaligned move sequences do not have fast path
24900 without broadcasting. */
24901 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24903 if (alg == vector_loop)
24905 gcc_assert (val_exp == const0_rtx);
24906 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24907 promoted_val = promote_duplicated_reg_to_size (val_exp,
24908 GET_MODE_SIZE (word_mode),
24909 desired_align, align);
24911 else
24913 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24914 desired_align, align);
24917 /* Misaligned move sequences handles both prologues and epilogues at once.
24918 Default code generation results in smaller code for large alignments and
24919 also avoids redundant job when sizes are known precisely. */
24920 if (misaligned_prologue_used)
24922 /* Misaligned move prologue handled small blocks by itself. */
24923 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24924 (dst, src, &destreg, &srcreg,
24925 move_mode, promoted_val, vec_promoted_val,
24926 &count_exp,
24927 &jump_around_label,
24928 desired_align < align
24929 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24930 desired_align, align, &min_size, dynamic_check, issetmem);
24931 if (!issetmem)
24932 src = change_address (src, BLKmode, srcreg);
24933 dst = change_address (dst, BLKmode, destreg);
24934 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24935 epilogue_size_needed = 0;
24936 if (need_zero_guard && !min_size)
24938 /* It is possible that we copied enough so the main loop will not
24939 execute. */
24940 gcc_assert (size_needed > 1);
24941 if (jump_around_label == NULL_RTX)
24942 jump_around_label = gen_label_rtx ();
24943 emit_cmp_and_jump_insns (count_exp,
24944 GEN_INT (size_needed),
24945 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24946 if (expected_size == -1
24947 || expected_size < (desired_align - align) / 2 + size_needed)
24948 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24949 else
24950 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24953 /* Ensure that alignment prologue won't copy past end of block. */
24954 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24956 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24957 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24958 Make sure it is power of 2. */
24959 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24961 /* To improve performance of small blocks, we jump around the VAL
24962 promoting mode. This mean that if the promoted VAL is not constant,
24963 we might not use it in the epilogue and have to use byte
24964 loop variant. */
24965 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24966 force_loopy_epilogue = true;
24967 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24968 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24970 /* If main algorithm works on QImode, no epilogue is needed.
24971 For small sizes just don't align anything. */
24972 if (size_needed == 1)
24973 desired_align = align;
24974 else
24975 goto epilogue;
24977 else if (!count
24978 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24980 label = gen_label_rtx ();
24981 emit_cmp_and_jump_insns (count_exp,
24982 GEN_INT (epilogue_size_needed),
24983 LTU, 0, counter_mode (count_exp), 1, label);
24984 if (expected_size == -1 || expected_size < epilogue_size_needed)
24985 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24986 else
24987 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24991 /* Emit code to decide on runtime whether library call or inline should be
24992 used. */
24993 if (dynamic_check != -1)
24995 if (!issetmem && CONST_INT_P (count_exp))
24997 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24999 emit_block_move_via_libcall (dst, src, count_exp, false);
25000 count_exp = const0_rtx;
25001 goto epilogue;
25004 else
25006 rtx_code_label *hot_label = gen_label_rtx ();
25007 if (jump_around_label == NULL_RTX)
25008 jump_around_label = gen_label_rtx ();
25009 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
25010 LEU, 0, counter_mode (count_exp),
25011 1, hot_label);
25012 predict_jump (REG_BR_PROB_BASE * 90 / 100);
25013 if (issetmem)
25014 set_storage_via_libcall (dst, count_exp, val_exp, false);
25015 else
25016 emit_block_move_via_libcall (dst, src, count_exp, false);
25017 emit_jump (jump_around_label);
25018 emit_label (hot_label);
25022 /* Step 2: Alignment prologue. */
25023 /* Do the expensive promotion once we branched off the small blocks. */
25024 if (issetmem && !promoted_val)
25025 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25026 desired_align, align);
25028 if (desired_align > align && !misaligned_prologue_used)
25030 if (align_bytes == 0)
25032 /* Except for the first move in prologue, we no longer know
25033 constant offset in aliasing info. It don't seems to worth
25034 the pain to maintain it for the first move, so throw away
25035 the info early. */
25036 dst = change_address (dst, BLKmode, destreg);
25037 if (!issetmem)
25038 src = change_address (src, BLKmode, srcreg);
25039 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
25040 promoted_val, vec_promoted_val,
25041 count_exp, align, desired_align,
25042 issetmem);
25043 /* At most desired_align - align bytes are copied. */
25044 if (min_size < (unsigned)(desired_align - align))
25045 min_size = 0;
25046 else
25047 min_size -= desired_align - align;
25049 else
25051 /* If we know how many bytes need to be stored before dst is
25052 sufficiently aligned, maintain aliasing info accurately. */
25053 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
25054 srcreg,
25055 promoted_val,
25056 vec_promoted_val,
25057 desired_align,
25058 align_bytes,
25059 issetmem);
25061 count_exp = plus_constant (counter_mode (count_exp),
25062 count_exp, -align_bytes);
25063 count -= align_bytes;
25064 min_size -= align_bytes;
25065 max_size -= align_bytes;
25067 if (need_zero_guard
25068 && !min_size
25069 && (count < (unsigned HOST_WIDE_INT) size_needed
25070 || (align_bytes == 0
25071 && count < ((unsigned HOST_WIDE_INT) size_needed
25072 + desired_align - align))))
25074 /* It is possible that we copied enough so the main loop will not
25075 execute. */
25076 gcc_assert (size_needed > 1);
25077 if (label == NULL_RTX)
25078 label = gen_label_rtx ();
25079 emit_cmp_and_jump_insns (count_exp,
25080 GEN_INT (size_needed),
25081 LTU, 0, counter_mode (count_exp), 1, label);
25082 if (expected_size == -1
25083 || expected_size < (desired_align - align) / 2 + size_needed)
25084 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25085 else
25086 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25089 if (label && size_needed == 1)
25091 emit_label (label);
25092 LABEL_NUSES (label) = 1;
25093 label = NULL;
25094 epilogue_size_needed = 1;
25095 if (issetmem)
25096 promoted_val = val_exp;
25098 else if (label == NULL_RTX && !misaligned_prologue_used)
25099 epilogue_size_needed = size_needed;
25101 /* Step 3: Main loop. */
25103 switch (alg)
25105 case libcall:
25106 case no_stringop:
25107 case last_alg:
25108 gcc_unreachable ();
25109 case loop_1_byte:
25110 case loop:
25111 case unrolled_loop:
25112 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25113 count_exp, move_mode, unroll_factor,
25114 expected_size, issetmem);
25115 break;
25116 case vector_loop:
25117 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25118 vec_promoted_val, count_exp, move_mode,
25119 unroll_factor, expected_size, issetmem);
25120 break;
25121 case rep_prefix_8_byte:
25122 case rep_prefix_4_byte:
25123 case rep_prefix_1_byte:
25124 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25125 val_exp, count_exp, move_mode, issetmem);
25126 break;
25128 /* Adjust properly the offset of src and dest memory for aliasing. */
25129 if (CONST_INT_P (count_exp))
25131 if (!issetmem)
25132 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25133 (count / size_needed) * size_needed);
25134 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25135 (count / size_needed) * size_needed);
25137 else
25139 if (!issetmem)
25140 src = change_address (src, BLKmode, srcreg);
25141 dst = change_address (dst, BLKmode, destreg);
25144 /* Step 4: Epilogue to copy the remaining bytes. */
25145 epilogue:
25146 if (label)
25148 /* When the main loop is done, COUNT_EXP might hold original count,
25149 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25150 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25151 bytes. Compensate if needed. */
25153 if (size_needed < epilogue_size_needed)
25155 tmp =
25156 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25157 GEN_INT (size_needed - 1), count_exp, 1,
25158 OPTAB_DIRECT);
25159 if (tmp != count_exp)
25160 emit_move_insn (count_exp, tmp);
25162 emit_label (label);
25163 LABEL_NUSES (label) = 1;
25166 if (count_exp != const0_rtx && epilogue_size_needed > 1)
25168 if (force_loopy_epilogue)
25169 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25170 epilogue_size_needed);
25171 else
25173 if (issetmem)
25174 expand_setmem_epilogue (dst, destreg, promoted_val,
25175 vec_promoted_val, count_exp,
25176 epilogue_size_needed);
25177 else
25178 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25179 epilogue_size_needed);
25182 if (jump_around_label)
25183 emit_label (jump_around_label);
25184 return true;
25188 /* Expand the appropriate insns for doing strlen if not just doing
25189 repnz; scasb
25191 out = result, initialized with the start address
25192 align_rtx = alignment of the address.
25193 scratch = scratch register, initialized with the startaddress when
25194 not aligned, otherwise undefined
25196 This is just the body. It needs the initializations mentioned above and
25197 some address computing at the end. These things are done in i386.md. */
25199 static void
25200 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25202 int align;
25203 rtx tmp;
25204 rtx_code_label *align_2_label = NULL;
25205 rtx_code_label *align_3_label = NULL;
25206 rtx_code_label *align_4_label = gen_label_rtx ();
25207 rtx_code_label *end_0_label = gen_label_rtx ();
25208 rtx mem;
25209 rtx tmpreg = gen_reg_rtx (SImode);
25210 rtx scratch = gen_reg_rtx (SImode);
25211 rtx cmp;
25213 align = 0;
25214 if (CONST_INT_P (align_rtx))
25215 align = INTVAL (align_rtx);
25217 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
25219 /* Is there a known alignment and is it less than 4? */
25220 if (align < 4)
25222 rtx scratch1 = gen_reg_rtx (Pmode);
25223 emit_move_insn (scratch1, out);
25224 /* Is there a known alignment and is it not 2? */
25225 if (align != 2)
25227 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25228 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25230 /* Leave just the 3 lower bits. */
25231 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25232 NULL_RTX, 0, OPTAB_WIDEN);
25234 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25235 Pmode, 1, align_4_label);
25236 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25237 Pmode, 1, align_2_label);
25238 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25239 Pmode, 1, align_3_label);
25241 else
25243 /* Since the alignment is 2, we have to check 2 or 0 bytes;
25244 check if is aligned to 4 - byte. */
25246 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25247 NULL_RTX, 0, OPTAB_WIDEN);
25249 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25250 Pmode, 1, align_4_label);
25253 mem = change_address (src, QImode, out);
25255 /* Now compare the bytes. */
25257 /* Compare the first n unaligned byte on a byte per byte basis. */
25258 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25259 QImode, 1, end_0_label);
25261 /* Increment the address. */
25262 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25264 /* Not needed with an alignment of 2 */
25265 if (align != 2)
25267 emit_label (align_2_label);
25269 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25270 end_0_label);
25272 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25274 emit_label (align_3_label);
25277 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25278 end_0_label);
25280 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25283 /* Generate loop to check 4 bytes at a time. It is not a good idea to
25284 align this loop. It gives only huge programs, but does not help to
25285 speed up. */
25286 emit_label (align_4_label);
25288 mem = change_address (src, SImode, out);
25289 emit_move_insn (scratch, mem);
25290 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25292 /* This formula yields a nonzero result iff one of the bytes is zero.
25293 This saves three branches inside loop and many cycles. */
25295 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25296 emit_insn (gen_one_cmplsi2 (scratch, scratch));
25297 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25298 emit_insn (gen_andsi3 (tmpreg, tmpreg,
25299 gen_int_mode (0x80808080, SImode)));
25300 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25301 align_4_label);
25303 if (TARGET_CMOVE)
25305 rtx reg = gen_reg_rtx (SImode);
25306 rtx reg2 = gen_reg_rtx (Pmode);
25307 emit_move_insn (reg, tmpreg);
25308 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25310 /* If zero is not in the first two bytes, move two bytes forward. */
25311 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25312 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25313 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25314 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
25315 gen_rtx_IF_THEN_ELSE (SImode, tmp,
25316 reg,
25317 tmpreg)));
25318 /* Emit lea manually to avoid clobbering of flags. */
25319 emit_insn (gen_rtx_SET (SImode, reg2,
25320 gen_rtx_PLUS (Pmode, out, const2_rtx)));
25322 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25323 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25324 emit_insn (gen_rtx_SET (VOIDmode, out,
25325 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25326 reg2,
25327 out)));
25329 else
25331 rtx_code_label *end_2_label = gen_label_rtx ();
25332 /* Is zero in the first two bytes? */
25334 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25335 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25336 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25337 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25338 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25339 pc_rtx);
25340 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25341 JUMP_LABEL (tmp) = end_2_label;
25343 /* Not in the first two. Move two bytes forward. */
25344 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25345 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25347 emit_label (end_2_label);
25351 /* Avoid branch in fixing the byte. */
25352 tmpreg = gen_lowpart (QImode, tmpreg);
25353 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
25354 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25355 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25356 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25358 emit_label (end_0_label);
25361 /* Expand strlen. */
25363 bool
25364 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25366 rtx addr, scratch1, scratch2, scratch3, scratch4;
25368 /* The generic case of strlen expander is long. Avoid it's
25369 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25371 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25372 && !TARGET_INLINE_ALL_STRINGOPS
25373 && !optimize_insn_for_size_p ()
25374 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25375 return false;
25377 addr = force_reg (Pmode, XEXP (src, 0));
25378 scratch1 = gen_reg_rtx (Pmode);
25380 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25381 && !optimize_insn_for_size_p ())
25383 /* Well it seems that some optimizer does not combine a call like
25384 foo(strlen(bar), strlen(bar));
25385 when the move and the subtraction is done here. It does calculate
25386 the length just once when these instructions are done inside of
25387 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25388 often used and I use one fewer register for the lifetime of
25389 output_strlen_unroll() this is better. */
25391 emit_move_insn (out, addr);
25393 ix86_expand_strlensi_unroll_1 (out, src, align);
25395 /* strlensi_unroll_1 returns the address of the zero at the end of
25396 the string, like memchr(), so compute the length by subtracting
25397 the start address. */
25398 emit_insn (ix86_gen_sub3 (out, out, addr));
25400 else
25402 rtx unspec;
25404 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25405 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25406 return false;
25408 scratch2 = gen_reg_rtx (Pmode);
25409 scratch3 = gen_reg_rtx (Pmode);
25410 scratch4 = force_reg (Pmode, constm1_rtx);
25412 emit_move_insn (scratch3, addr);
25413 eoschar = force_reg (QImode, eoschar);
25415 src = replace_equiv_address_nv (src, scratch3);
25417 /* If .md starts supporting :P, this can be done in .md. */
25418 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25419 scratch4), UNSPEC_SCAS);
25420 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25421 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25422 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25424 return true;
25427 /* For given symbol (function) construct code to compute address of it's PLT
25428 entry in large x86-64 PIC model. */
25429 static rtx
25430 construct_plt_address (rtx symbol)
25432 rtx tmp, unspec;
25434 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25435 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25436 gcc_assert (Pmode == DImode);
25438 tmp = gen_reg_rtx (Pmode);
25439 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25441 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25442 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25443 return tmp;
25447 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25448 rtx callarg2,
25449 rtx pop, bool sibcall)
25451 rtx vec[3];
25452 rtx use = NULL, call;
25453 unsigned int vec_len = 0;
25455 if (pop == const0_rtx)
25456 pop = NULL;
25457 gcc_assert (!TARGET_64BIT || !pop);
25459 if (TARGET_MACHO && !TARGET_64BIT)
25461 #if TARGET_MACHO
25462 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25463 fnaddr = machopic_indirect_call_target (fnaddr);
25464 #endif
25466 else
25468 /* Static functions and indirect calls don't need the pic register. */
25469 if (flag_pic
25470 && (!TARGET_64BIT
25471 || (ix86_cmodel == CM_LARGE_PIC
25472 && DEFAULT_ABI != MS_ABI))
25473 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25474 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
25476 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25477 if (ix86_use_pseudo_pic_reg ())
25478 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25479 pic_offset_table_rtx);
25483 /* Skip setting up RAX register for -mskip-rax-setup when there are no
25484 parameters passed in vector registers. */
25485 if (TARGET_64BIT
25486 && (INTVAL (callarg2) > 0
25487 || (INTVAL (callarg2) == 0
25488 && (TARGET_SSE || !flag_skip_rax_setup))))
25490 rtx al = gen_rtx_REG (QImode, AX_REG);
25491 emit_move_insn (al, callarg2);
25492 use_reg (&use, al);
25495 if (ix86_cmodel == CM_LARGE_PIC
25496 && !TARGET_PECOFF
25497 && MEM_P (fnaddr)
25498 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25499 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25500 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25501 else if (sibcall
25502 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25503 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25505 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25506 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25509 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25511 if (retval)
25513 /* We should add bounds as destination register in case
25514 pointer with bounds may be returned. */
25515 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25517 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25518 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25519 if (GET_CODE (retval) == PARALLEL)
25521 b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
25522 b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
25523 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
25524 retval = chkp_join_splitted_slot (retval, par);
25526 else
25528 retval = gen_rtx_PARALLEL (VOIDmode,
25529 gen_rtvec (3, retval, b0, b1));
25530 chkp_put_regs_to_expr_list (retval);
25534 call = gen_rtx_SET (VOIDmode, retval, call);
25536 vec[vec_len++] = call;
25538 if (pop)
25540 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25541 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
25542 vec[vec_len++] = pop;
25545 if (TARGET_64BIT_MS_ABI
25546 && (!callarg2 || INTVAL (callarg2) != -2))
25548 int const cregs_size
25549 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25550 int i;
25552 for (i = 0; i < cregs_size; i++)
25554 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25555 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25557 clobber_reg (&use, gen_rtx_REG (mode, regno));
25561 if (vec_len > 1)
25562 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25563 call = emit_call_insn (call);
25564 if (use)
25565 CALL_INSN_FUNCTION_USAGE (call) = use;
25567 return call;
25570 /* Output the assembly for a call instruction. */
25572 const char *
25573 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25575 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25576 bool seh_nop_p = false;
25577 const char *xasm;
25579 if (SIBLING_CALL_P (insn))
25581 if (direct_p)
25582 xasm = "%!jmp\t%P0";
25583 /* SEH epilogue detection requires the indirect branch case
25584 to include REX.W. */
25585 else if (TARGET_SEH)
25586 xasm = "%!rex.W jmp %A0";
25587 else
25588 xasm = "%!jmp\t%A0";
25590 output_asm_insn (xasm, &call_op);
25591 return "";
25594 /* SEH unwinding can require an extra nop to be emitted in several
25595 circumstances. Determine if we have one of those. */
25596 if (TARGET_SEH)
25598 rtx_insn *i;
25600 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25602 /* If we get to another real insn, we don't need the nop. */
25603 if (INSN_P (i))
25604 break;
25606 /* If we get to the epilogue note, prevent a catch region from
25607 being adjacent to the standard epilogue sequence. If non-
25608 call-exceptions, we'll have done this during epilogue emission. */
25609 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25610 && !flag_non_call_exceptions
25611 && !can_throw_internal (insn))
25613 seh_nop_p = true;
25614 break;
25618 /* If we didn't find a real insn following the call, prevent the
25619 unwinder from looking into the next function. */
25620 if (i == NULL)
25621 seh_nop_p = true;
25624 if (direct_p)
25625 xasm = "%!call\t%P0";
25626 else
25627 xasm = "%!call\t%A0";
25629 output_asm_insn (xasm, &call_op);
25631 if (seh_nop_p)
25632 return "nop";
25634 return "";
25637 /* Clear stack slot assignments remembered from previous functions.
25638 This is called from INIT_EXPANDERS once before RTL is emitted for each
25639 function. */
25641 static struct machine_function *
25642 ix86_init_machine_status (void)
25644 struct machine_function *f;
25646 f = ggc_cleared_alloc<machine_function> ();
25647 f->use_fast_prologue_epilogue_nregs = -1;
25648 f->call_abi = ix86_abi;
25650 return f;
25653 /* Return a MEM corresponding to a stack slot with mode MODE.
25654 Allocate a new slot if necessary.
25656 The RTL for a function can have several slots available: N is
25657 which slot to use. */
25660 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25662 struct stack_local_entry *s;
25664 gcc_assert (n < MAX_386_STACK_LOCALS);
25666 for (s = ix86_stack_locals; s; s = s->next)
25667 if (s->mode == mode && s->n == n)
25668 return validize_mem (copy_rtx (s->rtl));
25670 s = ggc_alloc<stack_local_entry> ();
25671 s->n = n;
25672 s->mode = mode;
25673 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25675 s->next = ix86_stack_locals;
25676 ix86_stack_locals = s;
25677 return validize_mem (copy_rtx (s->rtl));
25680 static void
25681 ix86_instantiate_decls (void)
25683 struct stack_local_entry *s;
25685 for (s = ix86_stack_locals; s; s = s->next)
25686 if (s->rtl != NULL_RTX)
25687 instantiate_decl_rtl (s->rtl);
25690 /* Check whether x86 address PARTS is a pc-relative address. */
25692 static bool
25693 rip_relative_addr_p (struct ix86_address *parts)
25695 rtx base, index, disp;
25697 base = parts->base;
25698 index = parts->index;
25699 disp = parts->disp;
25701 if (disp && !base && !index)
25703 if (TARGET_64BIT)
25705 rtx symbol = disp;
25707 if (GET_CODE (disp) == CONST)
25708 symbol = XEXP (disp, 0);
25709 if (GET_CODE (symbol) == PLUS
25710 && CONST_INT_P (XEXP (symbol, 1)))
25711 symbol = XEXP (symbol, 0);
25713 if (GET_CODE (symbol) == LABEL_REF
25714 || (GET_CODE (symbol) == SYMBOL_REF
25715 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25716 || (GET_CODE (symbol) == UNSPEC
25717 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25718 || XINT (symbol, 1) == UNSPEC_PCREL
25719 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25720 return true;
25723 return false;
25726 /* Calculate the length of the memory address in the instruction encoding.
25727 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25728 or other prefixes. We never generate addr32 prefix for LEA insn. */
25731 memory_address_length (rtx addr, bool lea)
25733 struct ix86_address parts;
25734 rtx base, index, disp;
25735 int len;
25736 int ok;
25738 if (GET_CODE (addr) == PRE_DEC
25739 || GET_CODE (addr) == POST_INC
25740 || GET_CODE (addr) == PRE_MODIFY
25741 || GET_CODE (addr) == POST_MODIFY)
25742 return 0;
25744 ok = ix86_decompose_address (addr, &parts);
25745 gcc_assert (ok);
25747 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25749 /* If this is not LEA instruction, add the length of addr32 prefix. */
25750 if (TARGET_64BIT && !lea
25751 && (SImode_address_operand (addr, VOIDmode)
25752 || (parts.base && GET_MODE (parts.base) == SImode)
25753 || (parts.index && GET_MODE (parts.index) == SImode)))
25754 len++;
25756 base = parts.base;
25757 index = parts.index;
25758 disp = parts.disp;
25760 if (base && GET_CODE (base) == SUBREG)
25761 base = SUBREG_REG (base);
25762 if (index && GET_CODE (index) == SUBREG)
25763 index = SUBREG_REG (index);
25765 gcc_assert (base == NULL_RTX || REG_P (base));
25766 gcc_assert (index == NULL_RTX || REG_P (index));
25768 /* Rule of thumb:
25769 - esp as the base always wants an index,
25770 - ebp as the base always wants a displacement,
25771 - r12 as the base always wants an index,
25772 - r13 as the base always wants a displacement. */
25774 /* Register Indirect. */
25775 if (base && !index && !disp)
25777 /* esp (for its index) and ebp (for its displacement) need
25778 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25779 code. */
25780 if (base == arg_pointer_rtx
25781 || base == frame_pointer_rtx
25782 || REGNO (base) == SP_REG
25783 || REGNO (base) == BP_REG
25784 || REGNO (base) == R12_REG
25785 || REGNO (base) == R13_REG)
25786 len++;
25789 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25790 is not disp32, but disp32(%rip), so for disp32
25791 SIB byte is needed, unless print_operand_address
25792 optimizes it into disp32(%rip) or (%rip) is implied
25793 by UNSPEC. */
25794 else if (disp && !base && !index)
25796 len += 4;
25797 if (rip_relative_addr_p (&parts))
25798 len++;
25800 else
25802 /* Find the length of the displacement constant. */
25803 if (disp)
25805 if (base && satisfies_constraint_K (disp))
25806 len += 1;
25807 else
25808 len += 4;
25810 /* ebp always wants a displacement. Similarly r13. */
25811 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25812 len++;
25814 /* An index requires the two-byte modrm form.... */
25815 if (index
25816 /* ...like esp (or r12), which always wants an index. */
25817 || base == arg_pointer_rtx
25818 || base == frame_pointer_rtx
25819 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25820 len++;
25823 return len;
25826 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25827 is set, expect that insn have 8bit immediate alternative. */
25829 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25831 int len = 0;
25832 int i;
25833 extract_insn_cached (insn);
25834 for (i = recog_data.n_operands - 1; i >= 0; --i)
25835 if (CONSTANT_P (recog_data.operand[i]))
25837 enum attr_mode mode = get_attr_mode (insn);
25839 gcc_assert (!len);
25840 if (shortform && CONST_INT_P (recog_data.operand[i]))
25842 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25843 switch (mode)
25845 case MODE_QI:
25846 len = 1;
25847 continue;
25848 case MODE_HI:
25849 ival = trunc_int_for_mode (ival, HImode);
25850 break;
25851 case MODE_SI:
25852 ival = trunc_int_for_mode (ival, SImode);
25853 break;
25854 default:
25855 break;
25857 if (IN_RANGE (ival, -128, 127))
25859 len = 1;
25860 continue;
25863 switch (mode)
25865 case MODE_QI:
25866 len = 1;
25867 break;
25868 case MODE_HI:
25869 len = 2;
25870 break;
25871 case MODE_SI:
25872 len = 4;
25873 break;
25874 /* Immediates for DImode instructions are encoded
25875 as 32bit sign extended values. */
25876 case MODE_DI:
25877 len = 4;
25878 break;
25879 default:
25880 fatal_insn ("unknown insn mode", insn);
25883 return len;
25886 /* Compute default value for "length_address" attribute. */
25888 ix86_attr_length_address_default (rtx_insn *insn)
25890 int i;
25892 if (get_attr_type (insn) == TYPE_LEA)
25894 rtx set = PATTERN (insn), addr;
25896 if (GET_CODE (set) == PARALLEL)
25897 set = XVECEXP (set, 0, 0);
25899 gcc_assert (GET_CODE (set) == SET);
25901 addr = SET_SRC (set);
25903 return memory_address_length (addr, true);
25906 extract_insn_cached (insn);
25907 for (i = recog_data.n_operands - 1; i >= 0; --i)
25908 if (MEM_P (recog_data.operand[i]))
25910 constrain_operands_cached (insn, reload_completed);
25911 if (which_alternative != -1)
25913 const char *constraints = recog_data.constraints[i];
25914 int alt = which_alternative;
25916 while (*constraints == '=' || *constraints == '+')
25917 constraints++;
25918 while (alt-- > 0)
25919 while (*constraints++ != ',')
25921 /* Skip ignored operands. */
25922 if (*constraints == 'X')
25923 continue;
25925 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25927 return 0;
25930 /* Compute default value for "length_vex" attribute. It includes
25931 2 or 3 byte VEX prefix and 1 opcode byte. */
25934 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
25935 bool has_vex_w)
25937 int i;
25939 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25940 byte VEX prefix. */
25941 if (!has_0f_opcode || has_vex_w)
25942 return 3 + 1;
25944 /* We can always use 2 byte VEX prefix in 32bit. */
25945 if (!TARGET_64BIT)
25946 return 2 + 1;
25948 extract_insn_cached (insn);
25950 for (i = recog_data.n_operands - 1; i >= 0; --i)
25951 if (REG_P (recog_data.operand[i]))
25953 /* REX.W bit uses 3 byte VEX prefix. */
25954 if (GET_MODE (recog_data.operand[i]) == DImode
25955 && GENERAL_REG_P (recog_data.operand[i]))
25956 return 3 + 1;
25958 else
25960 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25961 if (MEM_P (recog_data.operand[i])
25962 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25963 return 3 + 1;
25966 return 2 + 1;
25969 /* Return the maximum number of instructions a cpu can issue. */
25971 static int
25972 ix86_issue_rate (void)
25974 switch (ix86_tune)
25976 case PROCESSOR_PENTIUM:
25977 case PROCESSOR_BONNELL:
25978 case PROCESSOR_SILVERMONT:
25979 case PROCESSOR_KNL:
25980 case PROCESSOR_INTEL:
25981 case PROCESSOR_K6:
25982 case PROCESSOR_BTVER2:
25983 case PROCESSOR_PENTIUM4:
25984 case PROCESSOR_NOCONA:
25985 return 2;
25987 case PROCESSOR_PENTIUMPRO:
25988 case PROCESSOR_ATHLON:
25989 case PROCESSOR_K8:
25990 case PROCESSOR_AMDFAM10:
25991 case PROCESSOR_GENERIC:
25992 case PROCESSOR_BTVER1:
25993 return 3;
25995 case PROCESSOR_BDVER1:
25996 case PROCESSOR_BDVER2:
25997 case PROCESSOR_BDVER3:
25998 case PROCESSOR_BDVER4:
25999 case PROCESSOR_CORE2:
26000 case PROCESSOR_NEHALEM:
26001 case PROCESSOR_SANDYBRIDGE:
26002 case PROCESSOR_HASWELL:
26003 return 4;
26005 default:
26006 return 1;
26010 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
26011 by DEP_INSN and nothing set by DEP_INSN. */
26013 static bool
26014 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
26016 rtx set, set2;
26018 /* Simplify the test for uninteresting insns. */
26019 if (insn_type != TYPE_SETCC
26020 && insn_type != TYPE_ICMOV
26021 && insn_type != TYPE_FCMOV
26022 && insn_type != TYPE_IBR)
26023 return false;
26025 if ((set = single_set (dep_insn)) != 0)
26027 set = SET_DEST (set);
26028 set2 = NULL_RTX;
26030 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
26031 && XVECLEN (PATTERN (dep_insn), 0) == 2
26032 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
26033 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
26035 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26036 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26038 else
26039 return false;
26041 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
26042 return false;
26044 /* This test is true if the dependent insn reads the flags but
26045 not any other potentially set register. */
26046 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
26047 return false;
26049 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
26050 return false;
26052 return true;
26055 /* Return true iff USE_INSN has a memory address with operands set by
26056 SET_INSN. */
26058 bool
26059 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
26061 int i;
26062 extract_insn_cached (use_insn);
26063 for (i = recog_data.n_operands - 1; i >= 0; --i)
26064 if (MEM_P (recog_data.operand[i]))
26066 rtx addr = XEXP (recog_data.operand[i], 0);
26067 return modified_in_p (addr, set_insn) != 0;
26069 return false;
26072 /* Helper function for exact_store_load_dependency.
26073 Return true if addr is found in insn. */
26074 static bool
26075 exact_dependency_1 (rtx addr, rtx insn)
26077 enum rtx_code code;
26078 const char *format_ptr;
26079 int i, j;
26081 code = GET_CODE (insn);
26082 switch (code)
26084 case MEM:
26085 if (rtx_equal_p (addr, insn))
26086 return true;
26087 break;
26088 case REG:
26089 CASE_CONST_ANY:
26090 case SYMBOL_REF:
26091 case CODE_LABEL:
26092 case PC:
26093 case CC0:
26094 case EXPR_LIST:
26095 return false;
26096 default:
26097 break;
26100 format_ptr = GET_RTX_FORMAT (code);
26101 for (i = 0; i < GET_RTX_LENGTH (code); i++)
26103 switch (*format_ptr++)
26105 case 'e':
26106 if (exact_dependency_1 (addr, XEXP (insn, i)))
26107 return true;
26108 break;
26109 case 'E':
26110 for (j = 0; j < XVECLEN (insn, i); j++)
26111 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
26112 return true;
26113 break;
26116 return false;
26119 /* Return true if there exists exact dependency for store & load, i.e.
26120 the same memory address is used in them. */
26121 static bool
26122 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
26124 rtx set1, set2;
26126 set1 = single_set (store);
26127 if (!set1)
26128 return false;
26129 if (!MEM_P (SET_DEST (set1)))
26130 return false;
26131 set2 = single_set (load);
26132 if (!set2)
26133 return false;
26134 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26135 return true;
26136 return false;
26139 static int
26140 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26142 enum attr_type insn_type, dep_insn_type;
26143 enum attr_memory memory;
26144 rtx set, set2;
26145 int dep_insn_code_number;
26147 /* Anti and output dependencies have zero cost on all CPUs. */
26148 if (REG_NOTE_KIND (link) != 0)
26149 return 0;
26151 dep_insn_code_number = recog_memoized (dep_insn);
26153 /* If we can't recognize the insns, we can't really do anything. */
26154 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26155 return cost;
26157 insn_type = get_attr_type (insn);
26158 dep_insn_type = get_attr_type (dep_insn);
26160 switch (ix86_tune)
26162 case PROCESSOR_PENTIUM:
26163 /* Address Generation Interlock adds a cycle of latency. */
26164 if (insn_type == TYPE_LEA)
26166 rtx addr = PATTERN (insn);
26168 if (GET_CODE (addr) == PARALLEL)
26169 addr = XVECEXP (addr, 0, 0);
26171 gcc_assert (GET_CODE (addr) == SET);
26173 addr = SET_SRC (addr);
26174 if (modified_in_p (addr, dep_insn))
26175 cost += 1;
26177 else if (ix86_agi_dependent (dep_insn, insn))
26178 cost += 1;
26180 /* ??? Compares pair with jump/setcc. */
26181 if (ix86_flags_dependent (insn, dep_insn, insn_type))
26182 cost = 0;
26184 /* Floating point stores require value to be ready one cycle earlier. */
26185 if (insn_type == TYPE_FMOV
26186 && get_attr_memory (insn) == MEMORY_STORE
26187 && !ix86_agi_dependent (dep_insn, insn))
26188 cost += 1;
26189 break;
26191 case PROCESSOR_PENTIUMPRO:
26192 /* INT->FP conversion is expensive. */
26193 if (get_attr_fp_int_src (dep_insn))
26194 cost += 5;
26196 /* There is one cycle extra latency between an FP op and a store. */
26197 if (insn_type == TYPE_FMOV
26198 && (set = single_set (dep_insn)) != NULL_RTX
26199 && (set2 = single_set (insn)) != NULL_RTX
26200 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26201 && MEM_P (SET_DEST (set2)))
26202 cost += 1;
26204 memory = get_attr_memory (insn);
26206 /* Show ability of reorder buffer to hide latency of load by executing
26207 in parallel with previous instruction in case
26208 previous instruction is not needed to compute the address. */
26209 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26210 && !ix86_agi_dependent (dep_insn, insn))
26212 /* Claim moves to take one cycle, as core can issue one load
26213 at time and the next load can start cycle later. */
26214 if (dep_insn_type == TYPE_IMOV
26215 || dep_insn_type == TYPE_FMOV)
26216 cost = 1;
26217 else if (cost > 1)
26218 cost--;
26220 break;
26222 case PROCESSOR_K6:
26223 /* The esp dependency is resolved before
26224 the instruction is really finished. */
26225 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26226 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26227 return 1;
26229 /* INT->FP conversion is expensive. */
26230 if (get_attr_fp_int_src (dep_insn))
26231 cost += 5;
26233 memory = get_attr_memory (insn);
26235 /* Show ability of reorder buffer to hide latency of load by executing
26236 in parallel with previous instruction in case
26237 previous instruction is not needed to compute the address. */
26238 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26239 && !ix86_agi_dependent (dep_insn, insn))
26241 /* Claim moves to take one cycle, as core can issue one load
26242 at time and the next load can start cycle later. */
26243 if (dep_insn_type == TYPE_IMOV
26244 || dep_insn_type == TYPE_FMOV)
26245 cost = 1;
26246 else if (cost > 2)
26247 cost -= 2;
26248 else
26249 cost = 1;
26251 break;
26253 case PROCESSOR_AMDFAM10:
26254 case PROCESSOR_BDVER1:
26255 case PROCESSOR_BDVER2:
26256 case PROCESSOR_BDVER3:
26257 case PROCESSOR_BDVER4:
26258 case PROCESSOR_BTVER1:
26259 case PROCESSOR_BTVER2:
26260 case PROCESSOR_GENERIC:
26261 /* Stack engine allows to execute push&pop instructions in parall. */
26262 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26263 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26264 return 0;
26265 /* FALLTHRU */
26267 case PROCESSOR_ATHLON:
26268 case PROCESSOR_K8:
26269 memory = get_attr_memory (insn);
26271 /* Show ability of reorder buffer to hide latency of load by executing
26272 in parallel with previous instruction in case
26273 previous instruction is not needed to compute the address. */
26274 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26275 && !ix86_agi_dependent (dep_insn, insn))
26277 enum attr_unit unit = get_attr_unit (insn);
26278 int loadcost = 3;
26280 /* Because of the difference between the length of integer and
26281 floating unit pipeline preparation stages, the memory operands
26282 for floating point are cheaper.
26284 ??? For Athlon it the difference is most probably 2. */
26285 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26286 loadcost = 3;
26287 else
26288 loadcost = TARGET_ATHLON ? 2 : 0;
26290 if (cost >= loadcost)
26291 cost -= loadcost;
26292 else
26293 cost = 0;
26295 break;
26297 case PROCESSOR_CORE2:
26298 case PROCESSOR_NEHALEM:
26299 case PROCESSOR_SANDYBRIDGE:
26300 case PROCESSOR_HASWELL:
26301 /* Stack engine allows to execute push&pop instructions in parall. */
26302 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26303 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26304 return 0;
26306 memory = get_attr_memory (insn);
26308 /* Show ability of reorder buffer to hide latency of load by executing
26309 in parallel with previous instruction in case
26310 previous instruction is not needed to compute the address. */
26311 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26312 && !ix86_agi_dependent (dep_insn, insn))
26314 if (cost >= 4)
26315 cost -= 4;
26316 else
26317 cost = 0;
26319 break;
26321 case PROCESSOR_SILVERMONT:
26322 case PROCESSOR_KNL:
26323 case PROCESSOR_INTEL:
26324 if (!reload_completed)
26325 return cost;
26327 /* Increase cost of integer loads. */
26328 memory = get_attr_memory (dep_insn);
26329 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26331 enum attr_unit unit = get_attr_unit (dep_insn);
26332 if (unit == UNIT_INTEGER && cost == 1)
26334 if (memory == MEMORY_LOAD)
26335 cost = 3;
26336 else
26338 /* Increase cost of ld/st for short int types only
26339 because of store forwarding issue. */
26340 rtx set = single_set (dep_insn);
26341 if (set && (GET_MODE (SET_DEST (set)) == QImode
26342 || GET_MODE (SET_DEST (set)) == HImode))
26344 /* Increase cost of store/load insn if exact
26345 dependence exists and it is load insn. */
26346 enum attr_memory insn_memory = get_attr_memory (insn);
26347 if (insn_memory == MEMORY_LOAD
26348 && exact_store_load_dependency (dep_insn, insn))
26349 cost = 3;
26355 default:
26356 break;
26359 return cost;
26362 /* How many alternative schedules to try. This should be as wide as the
26363 scheduling freedom in the DFA, but no wider. Making this value too
26364 large results extra work for the scheduler. */
26366 static int
26367 ia32_multipass_dfa_lookahead (void)
26369 switch (ix86_tune)
26371 case PROCESSOR_PENTIUM:
26372 return 2;
26374 case PROCESSOR_PENTIUMPRO:
26375 case PROCESSOR_K6:
26376 return 1;
26378 case PROCESSOR_BDVER1:
26379 case PROCESSOR_BDVER2:
26380 case PROCESSOR_BDVER3:
26381 case PROCESSOR_BDVER4:
26382 /* We use lookahead value 4 for BD both before and after reload
26383 schedules. Plan is to have value 8 included for O3. */
26384 return 4;
26386 case PROCESSOR_CORE2:
26387 case PROCESSOR_NEHALEM:
26388 case PROCESSOR_SANDYBRIDGE:
26389 case PROCESSOR_HASWELL:
26390 case PROCESSOR_BONNELL:
26391 case PROCESSOR_SILVERMONT:
26392 case PROCESSOR_KNL:
26393 case PROCESSOR_INTEL:
26394 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26395 as many instructions can be executed on a cycle, i.e.,
26396 issue_rate. I wonder why tuning for many CPUs does not do this. */
26397 if (reload_completed)
26398 return ix86_issue_rate ();
26399 /* Don't use lookahead for pre-reload schedule to save compile time. */
26400 return 0;
26402 default:
26403 return 0;
26407 /* Return true if target platform supports macro-fusion. */
26409 static bool
26410 ix86_macro_fusion_p ()
26412 return TARGET_FUSE_CMP_AND_BRANCH;
26415 /* Check whether current microarchitecture support macro fusion
26416 for insn pair "CONDGEN + CONDJMP". Refer to
26417 "Intel Architectures Optimization Reference Manual". */
26419 static bool
26420 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26422 rtx src, dest;
26423 enum rtx_code ccode;
26424 rtx compare_set = NULL_RTX, test_if, cond;
26425 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26427 if (!any_condjump_p (condjmp))
26428 return false;
26430 if (get_attr_type (condgen) != TYPE_TEST
26431 && get_attr_type (condgen) != TYPE_ICMP
26432 && get_attr_type (condgen) != TYPE_INCDEC
26433 && get_attr_type (condgen) != TYPE_ALU)
26434 return false;
26436 compare_set = single_set (condgen);
26437 if (compare_set == NULL_RTX
26438 && !TARGET_FUSE_ALU_AND_BRANCH)
26439 return false;
26441 if (compare_set == NULL_RTX)
26443 int i;
26444 rtx pat = PATTERN (condgen);
26445 for (i = 0; i < XVECLEN (pat, 0); i++)
26446 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26448 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26449 if (GET_CODE (set_src) == COMPARE)
26450 compare_set = XVECEXP (pat, 0, i);
26451 else
26452 alu_set = XVECEXP (pat, 0, i);
26455 if (compare_set == NULL_RTX)
26456 return false;
26457 src = SET_SRC (compare_set);
26458 if (GET_CODE (src) != COMPARE)
26459 return false;
26461 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26462 supported. */
26463 if ((MEM_P (XEXP (src, 0))
26464 && CONST_INT_P (XEXP (src, 1)))
26465 || (MEM_P (XEXP (src, 1))
26466 && CONST_INT_P (XEXP (src, 0))))
26467 return false;
26469 /* No fusion for RIP-relative address. */
26470 if (MEM_P (XEXP (src, 0)))
26471 addr = XEXP (XEXP (src, 0), 0);
26472 else if (MEM_P (XEXP (src, 1)))
26473 addr = XEXP (XEXP (src, 1), 0);
26475 if (addr) {
26476 ix86_address parts;
26477 int ok = ix86_decompose_address (addr, &parts);
26478 gcc_assert (ok);
26480 if (rip_relative_addr_p (&parts))
26481 return false;
26484 test_if = SET_SRC (pc_set (condjmp));
26485 cond = XEXP (test_if, 0);
26486 ccode = GET_CODE (cond);
26487 /* Check whether conditional jump use Sign or Overflow Flags. */
26488 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26489 && (ccode == GE
26490 || ccode == GT
26491 || ccode == LE
26492 || ccode == LT))
26493 return false;
26495 /* Return true for TYPE_TEST and TYPE_ICMP. */
26496 if (get_attr_type (condgen) == TYPE_TEST
26497 || get_attr_type (condgen) == TYPE_ICMP)
26498 return true;
26500 /* The following is the case that macro-fusion for alu + jmp. */
26501 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26502 return false;
26504 /* No fusion for alu op with memory destination operand. */
26505 dest = SET_DEST (alu_set);
26506 if (MEM_P (dest))
26507 return false;
26509 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26510 supported. */
26511 if (get_attr_type (condgen) == TYPE_INCDEC
26512 && (ccode == GEU
26513 || ccode == GTU
26514 || ccode == LEU
26515 || ccode == LTU))
26516 return false;
26518 return true;
26521 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26522 execution. It is applied if
26523 (1) IMUL instruction is on the top of list;
26524 (2) There exists the only producer of independent IMUL instruction in
26525 ready list.
26526 Return index of IMUL producer if it was found and -1 otherwise. */
26527 static int
26528 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26530 rtx_insn *insn;
26531 rtx set, insn1, insn2;
26532 sd_iterator_def sd_it;
26533 dep_t dep;
26534 int index = -1;
26535 int i;
26537 if (!TARGET_BONNELL)
26538 return index;
26540 /* Check that IMUL instruction is on the top of ready list. */
26541 insn = ready[n_ready - 1];
26542 set = single_set (insn);
26543 if (!set)
26544 return index;
26545 if (!(GET_CODE (SET_SRC (set)) == MULT
26546 && GET_MODE (SET_SRC (set)) == SImode))
26547 return index;
26549 /* Search for producer of independent IMUL instruction. */
26550 for (i = n_ready - 2; i >= 0; i--)
26552 insn = ready[i];
26553 if (!NONDEBUG_INSN_P (insn))
26554 continue;
26555 /* Skip IMUL instruction. */
26556 insn2 = PATTERN (insn);
26557 if (GET_CODE (insn2) == PARALLEL)
26558 insn2 = XVECEXP (insn2, 0, 0);
26559 if (GET_CODE (insn2) == SET
26560 && GET_CODE (SET_SRC (insn2)) == MULT
26561 && GET_MODE (SET_SRC (insn2)) == SImode)
26562 continue;
26564 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26566 rtx con;
26567 con = DEP_CON (dep);
26568 if (!NONDEBUG_INSN_P (con))
26569 continue;
26570 insn1 = PATTERN (con);
26571 if (GET_CODE (insn1) == PARALLEL)
26572 insn1 = XVECEXP (insn1, 0, 0);
26574 if (GET_CODE (insn1) == SET
26575 && GET_CODE (SET_SRC (insn1)) == MULT
26576 && GET_MODE (SET_SRC (insn1)) == SImode)
26578 sd_iterator_def sd_it1;
26579 dep_t dep1;
26580 /* Check if there is no other dependee for IMUL. */
26581 index = i;
26582 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26584 rtx pro;
26585 pro = DEP_PRO (dep1);
26586 if (!NONDEBUG_INSN_P (pro))
26587 continue;
26588 if (pro != insn)
26589 index = -1;
26591 if (index >= 0)
26592 break;
26595 if (index >= 0)
26596 break;
26598 return index;
26601 /* Try to find the best candidate on the top of ready list if two insns
26602 have the same priority - candidate is best if its dependees were
26603 scheduled earlier. Applied for Silvermont only.
26604 Return true if top 2 insns must be interchanged. */
26605 static bool
26606 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26608 rtx_insn *top = ready[n_ready - 1];
26609 rtx_insn *next = ready[n_ready - 2];
26610 rtx set;
26611 sd_iterator_def sd_it;
26612 dep_t dep;
26613 int clock1 = -1;
26614 int clock2 = -1;
26615 #define INSN_TICK(INSN) (HID (INSN)->tick)
26617 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26618 return false;
26620 if (!NONDEBUG_INSN_P (top))
26621 return false;
26622 if (!NONJUMP_INSN_P (top))
26623 return false;
26624 if (!NONDEBUG_INSN_P (next))
26625 return false;
26626 if (!NONJUMP_INSN_P (next))
26627 return false;
26628 set = single_set (top);
26629 if (!set)
26630 return false;
26631 set = single_set (next);
26632 if (!set)
26633 return false;
26635 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26637 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26638 return false;
26639 /* Determine winner more precise. */
26640 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26642 rtx pro;
26643 pro = DEP_PRO (dep);
26644 if (!NONDEBUG_INSN_P (pro))
26645 continue;
26646 if (INSN_TICK (pro) > clock1)
26647 clock1 = INSN_TICK (pro);
26649 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26651 rtx pro;
26652 pro = DEP_PRO (dep);
26653 if (!NONDEBUG_INSN_P (pro))
26654 continue;
26655 if (INSN_TICK (pro) > clock2)
26656 clock2 = INSN_TICK (pro);
26659 if (clock1 == clock2)
26661 /* Determine winner - load must win. */
26662 enum attr_memory memory1, memory2;
26663 memory1 = get_attr_memory (top);
26664 memory2 = get_attr_memory (next);
26665 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26666 return true;
26668 return (bool) (clock2 < clock1);
26670 return false;
26671 #undef INSN_TICK
26674 /* Perform possible reodering of ready list for Atom/Silvermont only.
26675 Return issue rate. */
26676 static int
26677 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26678 int *pn_ready, int clock_var)
26680 int issue_rate = -1;
26681 int n_ready = *pn_ready;
26682 int i;
26683 rtx_insn *insn;
26684 int index = -1;
26686 /* Set up issue rate. */
26687 issue_rate = ix86_issue_rate ();
26689 /* Do reodering for BONNELL/SILVERMONT only. */
26690 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26691 return issue_rate;
26693 /* Nothing to do if ready list contains only 1 instruction. */
26694 if (n_ready <= 1)
26695 return issue_rate;
26697 /* Do reodering for post-reload scheduler only. */
26698 if (!reload_completed)
26699 return issue_rate;
26701 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26703 if (sched_verbose > 1)
26704 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26705 INSN_UID (ready[index]));
26707 /* Put IMUL producer (ready[index]) at the top of ready list. */
26708 insn = ready[index];
26709 for (i = index; i < n_ready - 1; i++)
26710 ready[i] = ready[i + 1];
26711 ready[n_ready - 1] = insn;
26712 return issue_rate;
26715 /* Skip selective scheduling since HID is not populated in it. */
26716 if (clock_var != 0
26717 && !sel_sched_p ()
26718 && swap_top_of_ready_list (ready, n_ready))
26720 if (sched_verbose > 1)
26721 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26722 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26723 /* Swap 2 top elements of ready list. */
26724 insn = ready[n_ready - 1];
26725 ready[n_ready - 1] = ready[n_ready - 2];
26726 ready[n_ready - 2] = insn;
26728 return issue_rate;
26731 static bool
26732 ix86_class_likely_spilled_p (reg_class_t);
26734 /* Returns true if lhs of insn is HW function argument register and set up
26735 is_spilled to true if it is likely spilled HW register. */
26736 static bool
26737 insn_is_function_arg (rtx insn, bool* is_spilled)
26739 rtx dst;
26741 if (!NONDEBUG_INSN_P (insn))
26742 return false;
26743 /* Call instructions are not movable, ignore it. */
26744 if (CALL_P (insn))
26745 return false;
26746 insn = PATTERN (insn);
26747 if (GET_CODE (insn) == PARALLEL)
26748 insn = XVECEXP (insn, 0, 0);
26749 if (GET_CODE (insn) != SET)
26750 return false;
26751 dst = SET_DEST (insn);
26752 if (REG_P (dst) && HARD_REGISTER_P (dst)
26753 && ix86_function_arg_regno_p (REGNO (dst)))
26755 /* Is it likely spilled HW register? */
26756 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26757 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26758 *is_spilled = true;
26759 return true;
26761 return false;
26764 /* Add output dependencies for chain of function adjacent arguments if only
26765 there is a move to likely spilled HW register. Return first argument
26766 if at least one dependence was added or NULL otherwise. */
26767 static rtx_insn *
26768 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26770 rtx_insn *insn;
26771 rtx_insn *last = call;
26772 rtx_insn *first_arg = NULL;
26773 bool is_spilled = false;
26775 head = PREV_INSN (head);
26777 /* Find nearest to call argument passing instruction. */
26778 while (true)
26780 last = PREV_INSN (last);
26781 if (last == head)
26782 return NULL;
26783 if (!NONDEBUG_INSN_P (last))
26784 continue;
26785 if (insn_is_function_arg (last, &is_spilled))
26786 break;
26787 return NULL;
26790 first_arg = last;
26791 while (true)
26793 insn = PREV_INSN (last);
26794 if (!INSN_P (insn))
26795 break;
26796 if (insn == head)
26797 break;
26798 if (!NONDEBUG_INSN_P (insn))
26800 last = insn;
26801 continue;
26803 if (insn_is_function_arg (insn, &is_spilled))
26805 /* Add output depdendence between two function arguments if chain
26806 of output arguments contains likely spilled HW registers. */
26807 if (is_spilled)
26808 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26809 first_arg = last = insn;
26811 else
26812 break;
26814 if (!is_spilled)
26815 return NULL;
26816 return first_arg;
26819 /* Add output or anti dependency from insn to first_arg to restrict its code
26820 motion. */
26821 static void
26822 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26824 rtx set;
26825 rtx tmp;
26827 /* Add anti dependencies for bounds stores. */
26828 if (INSN_P (insn)
26829 && GET_CODE (PATTERN (insn)) == PARALLEL
26830 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
26831 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
26833 add_dependence (first_arg, insn, REG_DEP_ANTI);
26834 return;
26837 set = single_set (insn);
26838 if (!set)
26839 return;
26840 tmp = SET_DEST (set);
26841 if (REG_P (tmp))
26843 /* Add output dependency to the first function argument. */
26844 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26845 return;
26847 /* Add anti dependency. */
26848 add_dependence (first_arg, insn, REG_DEP_ANTI);
26851 /* Avoid cross block motion of function argument through adding dependency
26852 from the first non-jump instruction in bb. */
26853 static void
26854 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26856 rtx_insn *insn = BB_END (bb);
26858 while (insn)
26860 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26862 rtx set = single_set (insn);
26863 if (set)
26865 avoid_func_arg_motion (arg, insn);
26866 return;
26869 if (insn == BB_HEAD (bb))
26870 return;
26871 insn = PREV_INSN (insn);
26875 /* Hook for pre-reload schedule - avoid motion of function arguments
26876 passed in likely spilled HW registers. */
26877 static void
26878 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26880 rtx_insn *insn;
26881 rtx_insn *first_arg = NULL;
26882 if (reload_completed)
26883 return;
26884 while (head != tail && DEBUG_INSN_P (head))
26885 head = NEXT_INSN (head);
26886 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26887 if (INSN_P (insn) && CALL_P (insn))
26889 first_arg = add_parameter_dependencies (insn, head);
26890 if (first_arg)
26892 /* Add dependee for first argument to predecessors if only
26893 region contains more than one block. */
26894 basic_block bb = BLOCK_FOR_INSN (insn);
26895 int rgn = CONTAINING_RGN (bb->index);
26896 int nr_blks = RGN_NR_BLOCKS (rgn);
26897 /* Skip trivial regions and region head blocks that can have
26898 predecessors outside of region. */
26899 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26901 edge e;
26902 edge_iterator ei;
26904 /* Regions are SCCs with the exception of selective
26905 scheduling with pipelining of outer blocks enabled.
26906 So also check that immediate predecessors of a non-head
26907 block are in the same region. */
26908 FOR_EACH_EDGE (e, ei, bb->preds)
26910 /* Avoid creating of loop-carried dependencies through
26911 using topological ordering in the region. */
26912 if (rgn == CONTAINING_RGN (e->src->index)
26913 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26914 add_dependee_for_func_arg (first_arg, e->src);
26917 insn = first_arg;
26918 if (insn == head)
26919 break;
26922 else if (first_arg)
26923 avoid_func_arg_motion (first_arg, insn);
26926 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26927 HW registers to maximum, to schedule them at soon as possible. These are
26928 moves from function argument registers at the top of the function entry
26929 and moves from function return value registers after call. */
26930 static int
26931 ix86_adjust_priority (rtx_insn *insn, int priority)
26933 rtx set;
26935 if (reload_completed)
26936 return priority;
26938 if (!NONDEBUG_INSN_P (insn))
26939 return priority;
26941 set = single_set (insn);
26942 if (set)
26944 rtx tmp = SET_SRC (set);
26945 if (REG_P (tmp)
26946 && HARD_REGISTER_P (tmp)
26947 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26948 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26949 return current_sched_info->sched_max_insns_priority;
26952 return priority;
26955 /* Model decoder of Core 2/i7.
26956 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26957 track the instruction fetch block boundaries and make sure that long
26958 (9+ bytes) instructions are assigned to D0. */
26960 /* Maximum length of an insn that can be handled by
26961 a secondary decoder unit. '8' for Core 2/i7. */
26962 static int core2i7_secondary_decoder_max_insn_size;
26964 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26965 '16' for Core 2/i7. */
26966 static int core2i7_ifetch_block_size;
26968 /* Maximum number of instructions decoder can handle per cycle.
26969 '6' for Core 2/i7. */
26970 static int core2i7_ifetch_block_max_insns;
26972 typedef struct ix86_first_cycle_multipass_data_ *
26973 ix86_first_cycle_multipass_data_t;
26974 typedef const struct ix86_first_cycle_multipass_data_ *
26975 const_ix86_first_cycle_multipass_data_t;
26977 /* A variable to store target state across calls to max_issue within
26978 one cycle. */
26979 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26980 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26982 /* Initialize DATA. */
26983 static void
26984 core2i7_first_cycle_multipass_init (void *_data)
26986 ix86_first_cycle_multipass_data_t data
26987 = (ix86_first_cycle_multipass_data_t) _data;
26989 data->ifetch_block_len = 0;
26990 data->ifetch_block_n_insns = 0;
26991 data->ready_try_change = NULL;
26992 data->ready_try_change_size = 0;
26995 /* Advancing the cycle; reset ifetch block counts. */
26996 static void
26997 core2i7_dfa_post_advance_cycle (void)
26999 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
27001 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27003 data->ifetch_block_len = 0;
27004 data->ifetch_block_n_insns = 0;
27007 static int min_insn_size (rtx_insn *);
27009 /* Filter out insns from ready_try that the core will not be able to issue
27010 on current cycle due to decoder. */
27011 static void
27012 core2i7_first_cycle_multipass_filter_ready_try
27013 (const_ix86_first_cycle_multipass_data_t data,
27014 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
27016 while (n_ready--)
27018 rtx_insn *insn;
27019 int insn_size;
27021 if (ready_try[n_ready])
27022 continue;
27024 insn = get_ready_element (n_ready);
27025 insn_size = min_insn_size (insn);
27027 if (/* If this is a too long an insn for a secondary decoder ... */
27028 (!first_cycle_insn_p
27029 && insn_size > core2i7_secondary_decoder_max_insn_size)
27030 /* ... or it would not fit into the ifetch block ... */
27031 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
27032 /* ... or the decoder is full already ... */
27033 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
27034 /* ... mask the insn out. */
27036 ready_try[n_ready] = 1;
27038 if (data->ready_try_change)
27039 bitmap_set_bit (data->ready_try_change, n_ready);
27044 /* Prepare for a new round of multipass lookahead scheduling. */
27045 static void
27046 core2i7_first_cycle_multipass_begin (void *_data,
27047 signed char *ready_try, int n_ready,
27048 bool first_cycle_insn_p)
27050 ix86_first_cycle_multipass_data_t data
27051 = (ix86_first_cycle_multipass_data_t) _data;
27052 const_ix86_first_cycle_multipass_data_t prev_data
27053 = ix86_first_cycle_multipass_data;
27055 /* Restore the state from the end of the previous round. */
27056 data->ifetch_block_len = prev_data->ifetch_block_len;
27057 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
27059 /* Filter instructions that cannot be issued on current cycle due to
27060 decoder restrictions. */
27061 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27062 first_cycle_insn_p);
27065 /* INSN is being issued in current solution. Account for its impact on
27066 the decoder model. */
27067 static void
27068 core2i7_first_cycle_multipass_issue (void *_data,
27069 signed char *ready_try, int n_ready,
27070 rtx_insn *insn, const void *_prev_data)
27072 ix86_first_cycle_multipass_data_t data
27073 = (ix86_first_cycle_multipass_data_t) _data;
27074 const_ix86_first_cycle_multipass_data_t prev_data
27075 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
27077 int insn_size = min_insn_size (insn);
27079 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
27080 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
27081 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
27082 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27084 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
27085 if (!data->ready_try_change)
27087 data->ready_try_change = sbitmap_alloc (n_ready);
27088 data->ready_try_change_size = n_ready;
27090 else if (data->ready_try_change_size < n_ready)
27092 data->ready_try_change = sbitmap_resize (data->ready_try_change,
27093 n_ready, 0);
27094 data->ready_try_change_size = n_ready;
27096 bitmap_clear (data->ready_try_change);
27098 /* Filter out insns from ready_try that the core will not be able to issue
27099 on current cycle due to decoder. */
27100 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27101 false);
27104 /* Revert the effect on ready_try. */
27105 static void
27106 core2i7_first_cycle_multipass_backtrack (const void *_data,
27107 signed char *ready_try,
27108 int n_ready ATTRIBUTE_UNUSED)
27110 const_ix86_first_cycle_multipass_data_t data
27111 = (const_ix86_first_cycle_multipass_data_t) _data;
27112 unsigned int i = 0;
27113 sbitmap_iterator sbi;
27115 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
27116 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
27118 ready_try[i] = 0;
27122 /* Save the result of multipass lookahead scheduling for the next round. */
27123 static void
27124 core2i7_first_cycle_multipass_end (const void *_data)
27126 const_ix86_first_cycle_multipass_data_t data
27127 = (const_ix86_first_cycle_multipass_data_t) _data;
27128 ix86_first_cycle_multipass_data_t next_data
27129 = ix86_first_cycle_multipass_data;
27131 if (data != NULL)
27133 next_data->ifetch_block_len = data->ifetch_block_len;
27134 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
27138 /* Deallocate target data. */
27139 static void
27140 core2i7_first_cycle_multipass_fini (void *_data)
27142 ix86_first_cycle_multipass_data_t data
27143 = (ix86_first_cycle_multipass_data_t) _data;
27145 if (data->ready_try_change)
27147 sbitmap_free (data->ready_try_change);
27148 data->ready_try_change = NULL;
27149 data->ready_try_change_size = 0;
27153 /* Prepare for scheduling pass. */
27154 static void
27155 ix86_sched_init_global (FILE *, int, int)
27157 /* Install scheduling hooks for current CPU. Some of these hooks are used
27158 in time-critical parts of the scheduler, so we only set them up when
27159 they are actually used. */
27160 switch (ix86_tune)
27162 case PROCESSOR_CORE2:
27163 case PROCESSOR_NEHALEM:
27164 case PROCESSOR_SANDYBRIDGE:
27165 case PROCESSOR_HASWELL:
27166 /* Do not perform multipass scheduling for pre-reload schedule
27167 to save compile time. */
27168 if (reload_completed)
27170 targetm.sched.dfa_post_advance_cycle
27171 = core2i7_dfa_post_advance_cycle;
27172 targetm.sched.first_cycle_multipass_init
27173 = core2i7_first_cycle_multipass_init;
27174 targetm.sched.first_cycle_multipass_begin
27175 = core2i7_first_cycle_multipass_begin;
27176 targetm.sched.first_cycle_multipass_issue
27177 = core2i7_first_cycle_multipass_issue;
27178 targetm.sched.first_cycle_multipass_backtrack
27179 = core2i7_first_cycle_multipass_backtrack;
27180 targetm.sched.first_cycle_multipass_end
27181 = core2i7_first_cycle_multipass_end;
27182 targetm.sched.first_cycle_multipass_fini
27183 = core2i7_first_cycle_multipass_fini;
27185 /* Set decoder parameters. */
27186 core2i7_secondary_decoder_max_insn_size = 8;
27187 core2i7_ifetch_block_size = 16;
27188 core2i7_ifetch_block_max_insns = 6;
27189 break;
27191 /* ... Fall through ... */
27192 default:
27193 targetm.sched.dfa_post_advance_cycle = NULL;
27194 targetm.sched.first_cycle_multipass_init = NULL;
27195 targetm.sched.first_cycle_multipass_begin = NULL;
27196 targetm.sched.first_cycle_multipass_issue = NULL;
27197 targetm.sched.first_cycle_multipass_backtrack = NULL;
27198 targetm.sched.first_cycle_multipass_end = NULL;
27199 targetm.sched.first_cycle_multipass_fini = NULL;
27200 break;
27205 /* Compute the alignment given to a constant that is being placed in memory.
27206 EXP is the constant and ALIGN is the alignment that the object would
27207 ordinarily have.
27208 The value of this function is used instead of that alignment to align
27209 the object. */
27212 ix86_constant_alignment (tree exp, int align)
27214 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27215 || TREE_CODE (exp) == INTEGER_CST)
27217 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27218 return 64;
27219 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27220 return 128;
27222 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27223 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27224 return BITS_PER_WORD;
27226 return align;
27229 /* Compute the alignment for a static variable.
27230 TYPE is the data type, and ALIGN is the alignment that
27231 the object would ordinarily have. The value of this function is used
27232 instead of that alignment to align the object. */
27235 ix86_data_alignment (tree type, int align, bool opt)
27237 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27238 for symbols from other compilation units or symbols that don't need
27239 to bind locally. In order to preserve some ABI compatibility with
27240 those compilers, ensure we don't decrease alignment from what we
27241 used to assume. */
27243 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
27245 /* A data structure, equal or greater than the size of a cache line
27246 (64 bytes in the Pentium 4 and other recent Intel processors, including
27247 processors based on Intel Core microarchitecture) should be aligned
27248 so that its base address is a multiple of a cache line size. */
27250 int max_align
27251 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27253 if (max_align < BITS_PER_WORD)
27254 max_align = BITS_PER_WORD;
27256 switch (ix86_align_data_type)
27258 case ix86_align_data_type_abi: opt = false; break;
27259 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
27260 case ix86_align_data_type_cacheline: break;
27263 if (opt
27264 && AGGREGATE_TYPE_P (type)
27265 && TYPE_SIZE (type)
27266 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27268 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27269 && align < max_align_compat)
27270 align = max_align_compat;
27271 if (wi::geu_p (TYPE_SIZE (type), max_align)
27272 && align < max_align)
27273 align = max_align;
27276 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27277 to 16byte boundary. */
27278 if (TARGET_64BIT)
27280 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27281 && TYPE_SIZE (type)
27282 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27283 && wi::geu_p (TYPE_SIZE (type), 128)
27284 && align < 128)
27285 return 128;
27288 if (!opt)
27289 return align;
27291 if (TREE_CODE (type) == ARRAY_TYPE)
27293 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27294 return 64;
27295 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27296 return 128;
27298 else if (TREE_CODE (type) == COMPLEX_TYPE)
27301 if (TYPE_MODE (type) == DCmode && align < 64)
27302 return 64;
27303 if ((TYPE_MODE (type) == XCmode
27304 || TYPE_MODE (type) == TCmode) && align < 128)
27305 return 128;
27307 else if ((TREE_CODE (type) == RECORD_TYPE
27308 || TREE_CODE (type) == UNION_TYPE
27309 || TREE_CODE (type) == QUAL_UNION_TYPE)
27310 && TYPE_FIELDS (type))
27312 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27313 return 64;
27314 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27315 return 128;
27317 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27318 || TREE_CODE (type) == INTEGER_TYPE)
27320 if (TYPE_MODE (type) == DFmode && align < 64)
27321 return 64;
27322 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27323 return 128;
27326 return align;
27329 /* Compute the alignment for a local variable or a stack slot. EXP is
27330 the data type or decl itself, MODE is the widest mode available and
27331 ALIGN is the alignment that the object would ordinarily have. The
27332 value of this macro is used instead of that alignment to align the
27333 object. */
27335 unsigned int
27336 ix86_local_alignment (tree exp, machine_mode mode,
27337 unsigned int align)
27339 tree type, decl;
27341 if (exp && DECL_P (exp))
27343 type = TREE_TYPE (exp);
27344 decl = exp;
27346 else
27348 type = exp;
27349 decl = NULL;
27352 /* Don't do dynamic stack realignment for long long objects with
27353 -mpreferred-stack-boundary=2. */
27354 if (!TARGET_64BIT
27355 && align == 64
27356 && ix86_preferred_stack_boundary < 64
27357 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27358 && (!type || !TYPE_USER_ALIGN (type))
27359 && (!decl || !DECL_USER_ALIGN (decl)))
27360 align = 32;
27362 /* If TYPE is NULL, we are allocating a stack slot for caller-save
27363 register in MODE. We will return the largest alignment of XF
27364 and DF. */
27365 if (!type)
27367 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27368 align = GET_MODE_ALIGNMENT (DFmode);
27369 return align;
27372 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27373 to 16byte boundary. Exact wording is:
27375 An array uses the same alignment as its elements, except that a local or
27376 global array variable of length at least 16 bytes or
27377 a C99 variable-length array variable always has alignment of at least 16 bytes.
27379 This was added to allow use of aligned SSE instructions at arrays. This
27380 rule is meant for static storage (where compiler can not do the analysis
27381 by itself). We follow it for automatic variables only when convenient.
27382 We fully control everything in the function compiled and functions from
27383 other unit can not rely on the alignment.
27385 Exclude va_list type. It is the common case of local array where
27386 we can not benefit from the alignment.
27388 TODO: Probably one should optimize for size only when var is not escaping. */
27389 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27390 && TARGET_SSE)
27392 if (AGGREGATE_TYPE_P (type)
27393 && (va_list_type_node == NULL_TREE
27394 || (TYPE_MAIN_VARIANT (type)
27395 != TYPE_MAIN_VARIANT (va_list_type_node)))
27396 && TYPE_SIZE (type)
27397 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27398 && wi::geu_p (TYPE_SIZE (type), 16)
27399 && align < 128)
27400 return 128;
27402 if (TREE_CODE (type) == ARRAY_TYPE)
27404 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27405 return 64;
27406 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27407 return 128;
27409 else if (TREE_CODE (type) == COMPLEX_TYPE)
27411 if (TYPE_MODE (type) == DCmode && align < 64)
27412 return 64;
27413 if ((TYPE_MODE (type) == XCmode
27414 || TYPE_MODE (type) == TCmode) && align < 128)
27415 return 128;
27417 else if ((TREE_CODE (type) == RECORD_TYPE
27418 || TREE_CODE (type) == UNION_TYPE
27419 || TREE_CODE (type) == QUAL_UNION_TYPE)
27420 && TYPE_FIELDS (type))
27422 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27423 return 64;
27424 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27425 return 128;
27427 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27428 || TREE_CODE (type) == INTEGER_TYPE)
27431 if (TYPE_MODE (type) == DFmode && align < 64)
27432 return 64;
27433 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27434 return 128;
27436 return align;
27439 /* Compute the minimum required alignment for dynamic stack realignment
27440 purposes for a local variable, parameter or a stack slot. EXP is
27441 the data type or decl itself, MODE is its mode and ALIGN is the
27442 alignment that the object would ordinarily have. */
27444 unsigned int
27445 ix86_minimum_alignment (tree exp, machine_mode mode,
27446 unsigned int align)
27448 tree type, decl;
27450 if (exp && DECL_P (exp))
27452 type = TREE_TYPE (exp);
27453 decl = exp;
27455 else
27457 type = exp;
27458 decl = NULL;
27461 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27462 return align;
27464 /* Don't do dynamic stack realignment for long long objects with
27465 -mpreferred-stack-boundary=2. */
27466 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27467 && (!type || !TYPE_USER_ALIGN (type))
27468 && (!decl || !DECL_USER_ALIGN (decl)))
27469 return 32;
27471 return align;
27474 /* Find a location for the static chain incoming to a nested function.
27475 This is a register, unless all free registers are used by arguments. */
27477 static rtx
27478 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
27480 unsigned regno;
27482 /* While this function won't be called by the middle-end when a static
27483 chain isn't needed, it's also used throughout the backend so it's
27484 easiest to keep this check centralized. */
27485 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
27486 return NULL;
27488 if (TARGET_64BIT)
27490 /* We always use R10 in 64-bit mode. */
27491 regno = R10_REG;
27493 else
27495 const_tree fntype, fndecl;
27496 unsigned int ccvt;
27498 /* By default in 32-bit mode we use ECX to pass the static chain. */
27499 regno = CX_REG;
27501 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
27503 fntype = TREE_TYPE (fndecl_or_type);
27504 fndecl = fndecl_or_type;
27506 else
27508 fntype = fndecl_or_type;
27509 fndecl = NULL;
27512 ccvt = ix86_get_callcvt (fntype);
27513 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27515 /* Fastcall functions use ecx/edx for arguments, which leaves
27516 us with EAX for the static chain.
27517 Thiscall functions use ecx for arguments, which also
27518 leaves us with EAX for the static chain. */
27519 regno = AX_REG;
27521 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27523 /* Thiscall functions use ecx for arguments, which leaves
27524 us with EAX and EDX for the static chain.
27525 We are using for abi-compatibility EAX. */
27526 regno = AX_REG;
27528 else if (ix86_function_regparm (fntype, fndecl) == 3)
27530 /* For regparm 3, we have no free call-clobbered registers in
27531 which to store the static chain. In order to implement this,
27532 we have the trampoline push the static chain to the stack.
27533 However, we can't push a value below the return address when
27534 we call the nested function directly, so we have to use an
27535 alternate entry point. For this we use ESI, and have the
27536 alternate entry point push ESI, so that things appear the
27537 same once we're executing the nested function. */
27538 if (incoming_p)
27540 if (fndecl == current_function_decl)
27541 ix86_static_chain_on_stack = true;
27542 return gen_frame_mem (SImode,
27543 plus_constant (Pmode,
27544 arg_pointer_rtx, -8));
27546 regno = SI_REG;
27550 return gen_rtx_REG (Pmode, regno);
27553 /* Emit RTL insns to initialize the variable parts of a trampoline.
27554 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27555 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27556 to be passed to the target function. */
27558 static void
27559 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27561 rtx mem, fnaddr;
27562 int opcode;
27563 int offset = 0;
27565 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27567 if (TARGET_64BIT)
27569 int size;
27571 /* Load the function address to r11. Try to load address using
27572 the shorter movl instead of movabs. We may want to support
27573 movq for kernel mode, but kernel does not use trampolines at
27574 the moment. FNADDR is a 32bit address and may not be in
27575 DImode when ptr_mode == SImode. Always use movl in this
27576 case. */
27577 if (ptr_mode == SImode
27578 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27580 fnaddr = copy_addr_to_reg (fnaddr);
27582 mem = adjust_address (m_tramp, HImode, offset);
27583 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27585 mem = adjust_address (m_tramp, SImode, offset + 2);
27586 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27587 offset += 6;
27589 else
27591 mem = adjust_address (m_tramp, HImode, offset);
27592 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27594 mem = adjust_address (m_tramp, DImode, offset + 2);
27595 emit_move_insn (mem, fnaddr);
27596 offset += 10;
27599 /* Load static chain using movabs to r10. Use the shorter movl
27600 instead of movabs when ptr_mode == SImode. */
27601 if (ptr_mode == SImode)
27603 opcode = 0xba41;
27604 size = 6;
27606 else
27608 opcode = 0xba49;
27609 size = 10;
27612 mem = adjust_address (m_tramp, HImode, offset);
27613 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27615 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27616 emit_move_insn (mem, chain_value);
27617 offset += size;
27619 /* Jump to r11; the last (unused) byte is a nop, only there to
27620 pad the write out to a single 32-bit store. */
27621 mem = adjust_address (m_tramp, SImode, offset);
27622 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27623 offset += 4;
27625 else
27627 rtx disp, chain;
27629 /* Depending on the static chain location, either load a register
27630 with a constant, or push the constant to the stack. All of the
27631 instructions are the same size. */
27632 chain = ix86_static_chain (fndecl, true);
27633 if (REG_P (chain))
27635 switch (REGNO (chain))
27637 case AX_REG:
27638 opcode = 0xb8; break;
27639 case CX_REG:
27640 opcode = 0xb9; break;
27641 default:
27642 gcc_unreachable ();
27645 else
27646 opcode = 0x68;
27648 mem = adjust_address (m_tramp, QImode, offset);
27649 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27651 mem = adjust_address (m_tramp, SImode, offset + 1);
27652 emit_move_insn (mem, chain_value);
27653 offset += 5;
27655 mem = adjust_address (m_tramp, QImode, offset);
27656 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27658 mem = adjust_address (m_tramp, SImode, offset + 1);
27660 /* Compute offset from the end of the jmp to the target function.
27661 In the case in which the trampoline stores the static chain on
27662 the stack, we need to skip the first insn which pushes the
27663 (call-saved) register static chain; this push is 1 byte. */
27664 offset += 5;
27665 disp = expand_binop (SImode, sub_optab, fnaddr,
27666 plus_constant (Pmode, XEXP (m_tramp, 0),
27667 offset - (MEM_P (chain) ? 1 : 0)),
27668 NULL_RTX, 1, OPTAB_DIRECT);
27669 emit_move_insn (mem, disp);
27672 gcc_assert (offset <= TRAMPOLINE_SIZE);
27674 #ifdef HAVE_ENABLE_EXECUTE_STACK
27675 #ifdef CHECK_EXECUTE_STACK_ENABLED
27676 if (CHECK_EXECUTE_STACK_ENABLED)
27677 #endif
27678 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27679 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27680 #endif
27683 /* The following file contains several enumerations and data structures
27684 built from the definitions in i386-builtin-types.def. */
27686 #include "i386-builtin-types.inc"
27688 /* Table for the ix86 builtin non-function types. */
27689 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27691 /* Retrieve an element from the above table, building some of
27692 the types lazily. */
27694 static tree
27695 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27697 unsigned int index;
27698 tree type, itype;
27700 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27702 type = ix86_builtin_type_tab[(int) tcode];
27703 if (type != NULL)
27704 return type;
27706 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27707 if (tcode <= IX86_BT_LAST_VECT)
27709 machine_mode mode;
27711 index = tcode - IX86_BT_LAST_PRIM - 1;
27712 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27713 mode = ix86_builtin_type_vect_mode[index];
27715 type = build_vector_type_for_mode (itype, mode);
27717 else
27719 int quals;
27721 index = tcode - IX86_BT_LAST_VECT - 1;
27722 if (tcode <= IX86_BT_LAST_PTR)
27723 quals = TYPE_UNQUALIFIED;
27724 else
27725 quals = TYPE_QUAL_CONST;
27727 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27728 if (quals != TYPE_UNQUALIFIED)
27729 itype = build_qualified_type (itype, quals);
27731 type = build_pointer_type (itype);
27734 ix86_builtin_type_tab[(int) tcode] = type;
27735 return type;
27738 /* Table for the ix86 builtin function types. */
27739 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27741 /* Retrieve an element from the above table, building some of
27742 the types lazily. */
27744 static tree
27745 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27747 tree type;
27749 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27751 type = ix86_builtin_func_type_tab[(int) tcode];
27752 if (type != NULL)
27753 return type;
27755 if (tcode <= IX86_BT_LAST_FUNC)
27757 unsigned start = ix86_builtin_func_start[(int) tcode];
27758 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27759 tree rtype, atype, args = void_list_node;
27760 unsigned i;
27762 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27763 for (i = after - 1; i > start; --i)
27765 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27766 args = tree_cons (NULL, atype, args);
27769 type = build_function_type (rtype, args);
27771 else
27773 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27774 enum ix86_builtin_func_type icode;
27776 icode = ix86_builtin_func_alias_base[index];
27777 type = ix86_get_builtin_func_type (icode);
27780 ix86_builtin_func_type_tab[(int) tcode] = type;
27781 return type;
27785 /* Codes for all the SSE/MMX builtins. */
27786 enum ix86_builtins
27788 IX86_BUILTIN_ADDPS,
27789 IX86_BUILTIN_ADDSS,
27790 IX86_BUILTIN_DIVPS,
27791 IX86_BUILTIN_DIVSS,
27792 IX86_BUILTIN_MULPS,
27793 IX86_BUILTIN_MULSS,
27794 IX86_BUILTIN_SUBPS,
27795 IX86_BUILTIN_SUBSS,
27797 IX86_BUILTIN_CMPEQPS,
27798 IX86_BUILTIN_CMPLTPS,
27799 IX86_BUILTIN_CMPLEPS,
27800 IX86_BUILTIN_CMPGTPS,
27801 IX86_BUILTIN_CMPGEPS,
27802 IX86_BUILTIN_CMPNEQPS,
27803 IX86_BUILTIN_CMPNLTPS,
27804 IX86_BUILTIN_CMPNLEPS,
27805 IX86_BUILTIN_CMPNGTPS,
27806 IX86_BUILTIN_CMPNGEPS,
27807 IX86_BUILTIN_CMPORDPS,
27808 IX86_BUILTIN_CMPUNORDPS,
27809 IX86_BUILTIN_CMPEQSS,
27810 IX86_BUILTIN_CMPLTSS,
27811 IX86_BUILTIN_CMPLESS,
27812 IX86_BUILTIN_CMPNEQSS,
27813 IX86_BUILTIN_CMPNLTSS,
27814 IX86_BUILTIN_CMPNLESS,
27815 IX86_BUILTIN_CMPORDSS,
27816 IX86_BUILTIN_CMPUNORDSS,
27818 IX86_BUILTIN_COMIEQSS,
27819 IX86_BUILTIN_COMILTSS,
27820 IX86_BUILTIN_COMILESS,
27821 IX86_BUILTIN_COMIGTSS,
27822 IX86_BUILTIN_COMIGESS,
27823 IX86_BUILTIN_COMINEQSS,
27824 IX86_BUILTIN_UCOMIEQSS,
27825 IX86_BUILTIN_UCOMILTSS,
27826 IX86_BUILTIN_UCOMILESS,
27827 IX86_BUILTIN_UCOMIGTSS,
27828 IX86_BUILTIN_UCOMIGESS,
27829 IX86_BUILTIN_UCOMINEQSS,
27831 IX86_BUILTIN_CVTPI2PS,
27832 IX86_BUILTIN_CVTPS2PI,
27833 IX86_BUILTIN_CVTSI2SS,
27834 IX86_BUILTIN_CVTSI642SS,
27835 IX86_BUILTIN_CVTSS2SI,
27836 IX86_BUILTIN_CVTSS2SI64,
27837 IX86_BUILTIN_CVTTPS2PI,
27838 IX86_BUILTIN_CVTTSS2SI,
27839 IX86_BUILTIN_CVTTSS2SI64,
27841 IX86_BUILTIN_MAXPS,
27842 IX86_BUILTIN_MAXSS,
27843 IX86_BUILTIN_MINPS,
27844 IX86_BUILTIN_MINSS,
27846 IX86_BUILTIN_LOADUPS,
27847 IX86_BUILTIN_STOREUPS,
27848 IX86_BUILTIN_MOVSS,
27850 IX86_BUILTIN_MOVHLPS,
27851 IX86_BUILTIN_MOVLHPS,
27852 IX86_BUILTIN_LOADHPS,
27853 IX86_BUILTIN_LOADLPS,
27854 IX86_BUILTIN_STOREHPS,
27855 IX86_BUILTIN_STORELPS,
27857 IX86_BUILTIN_MASKMOVQ,
27858 IX86_BUILTIN_MOVMSKPS,
27859 IX86_BUILTIN_PMOVMSKB,
27861 IX86_BUILTIN_MOVNTPS,
27862 IX86_BUILTIN_MOVNTQ,
27864 IX86_BUILTIN_LOADDQU,
27865 IX86_BUILTIN_STOREDQU,
27867 IX86_BUILTIN_PACKSSWB,
27868 IX86_BUILTIN_PACKSSDW,
27869 IX86_BUILTIN_PACKUSWB,
27871 IX86_BUILTIN_PADDB,
27872 IX86_BUILTIN_PADDW,
27873 IX86_BUILTIN_PADDD,
27874 IX86_BUILTIN_PADDQ,
27875 IX86_BUILTIN_PADDSB,
27876 IX86_BUILTIN_PADDSW,
27877 IX86_BUILTIN_PADDUSB,
27878 IX86_BUILTIN_PADDUSW,
27879 IX86_BUILTIN_PSUBB,
27880 IX86_BUILTIN_PSUBW,
27881 IX86_BUILTIN_PSUBD,
27882 IX86_BUILTIN_PSUBQ,
27883 IX86_BUILTIN_PSUBSB,
27884 IX86_BUILTIN_PSUBSW,
27885 IX86_BUILTIN_PSUBUSB,
27886 IX86_BUILTIN_PSUBUSW,
27888 IX86_BUILTIN_PAND,
27889 IX86_BUILTIN_PANDN,
27890 IX86_BUILTIN_POR,
27891 IX86_BUILTIN_PXOR,
27893 IX86_BUILTIN_PAVGB,
27894 IX86_BUILTIN_PAVGW,
27896 IX86_BUILTIN_PCMPEQB,
27897 IX86_BUILTIN_PCMPEQW,
27898 IX86_BUILTIN_PCMPEQD,
27899 IX86_BUILTIN_PCMPGTB,
27900 IX86_BUILTIN_PCMPGTW,
27901 IX86_BUILTIN_PCMPGTD,
27903 IX86_BUILTIN_PMADDWD,
27905 IX86_BUILTIN_PMAXSW,
27906 IX86_BUILTIN_PMAXUB,
27907 IX86_BUILTIN_PMINSW,
27908 IX86_BUILTIN_PMINUB,
27910 IX86_BUILTIN_PMULHUW,
27911 IX86_BUILTIN_PMULHW,
27912 IX86_BUILTIN_PMULLW,
27914 IX86_BUILTIN_PSADBW,
27915 IX86_BUILTIN_PSHUFW,
27917 IX86_BUILTIN_PSLLW,
27918 IX86_BUILTIN_PSLLD,
27919 IX86_BUILTIN_PSLLQ,
27920 IX86_BUILTIN_PSRAW,
27921 IX86_BUILTIN_PSRAD,
27922 IX86_BUILTIN_PSRLW,
27923 IX86_BUILTIN_PSRLD,
27924 IX86_BUILTIN_PSRLQ,
27925 IX86_BUILTIN_PSLLWI,
27926 IX86_BUILTIN_PSLLDI,
27927 IX86_BUILTIN_PSLLQI,
27928 IX86_BUILTIN_PSRAWI,
27929 IX86_BUILTIN_PSRADI,
27930 IX86_BUILTIN_PSRLWI,
27931 IX86_BUILTIN_PSRLDI,
27932 IX86_BUILTIN_PSRLQI,
27934 IX86_BUILTIN_PUNPCKHBW,
27935 IX86_BUILTIN_PUNPCKHWD,
27936 IX86_BUILTIN_PUNPCKHDQ,
27937 IX86_BUILTIN_PUNPCKLBW,
27938 IX86_BUILTIN_PUNPCKLWD,
27939 IX86_BUILTIN_PUNPCKLDQ,
27941 IX86_BUILTIN_SHUFPS,
27943 IX86_BUILTIN_RCPPS,
27944 IX86_BUILTIN_RCPSS,
27945 IX86_BUILTIN_RSQRTPS,
27946 IX86_BUILTIN_RSQRTPS_NR,
27947 IX86_BUILTIN_RSQRTSS,
27948 IX86_BUILTIN_RSQRTF,
27949 IX86_BUILTIN_SQRTPS,
27950 IX86_BUILTIN_SQRTPS_NR,
27951 IX86_BUILTIN_SQRTSS,
27953 IX86_BUILTIN_UNPCKHPS,
27954 IX86_BUILTIN_UNPCKLPS,
27956 IX86_BUILTIN_ANDPS,
27957 IX86_BUILTIN_ANDNPS,
27958 IX86_BUILTIN_ORPS,
27959 IX86_BUILTIN_XORPS,
27961 IX86_BUILTIN_EMMS,
27962 IX86_BUILTIN_LDMXCSR,
27963 IX86_BUILTIN_STMXCSR,
27964 IX86_BUILTIN_SFENCE,
27966 IX86_BUILTIN_FXSAVE,
27967 IX86_BUILTIN_FXRSTOR,
27968 IX86_BUILTIN_FXSAVE64,
27969 IX86_BUILTIN_FXRSTOR64,
27971 IX86_BUILTIN_XSAVE,
27972 IX86_BUILTIN_XRSTOR,
27973 IX86_BUILTIN_XSAVE64,
27974 IX86_BUILTIN_XRSTOR64,
27976 IX86_BUILTIN_XSAVEOPT,
27977 IX86_BUILTIN_XSAVEOPT64,
27979 IX86_BUILTIN_XSAVEC,
27980 IX86_BUILTIN_XSAVEC64,
27982 IX86_BUILTIN_XSAVES,
27983 IX86_BUILTIN_XRSTORS,
27984 IX86_BUILTIN_XSAVES64,
27985 IX86_BUILTIN_XRSTORS64,
27987 /* 3DNow! Original */
27988 IX86_BUILTIN_FEMMS,
27989 IX86_BUILTIN_PAVGUSB,
27990 IX86_BUILTIN_PF2ID,
27991 IX86_BUILTIN_PFACC,
27992 IX86_BUILTIN_PFADD,
27993 IX86_BUILTIN_PFCMPEQ,
27994 IX86_BUILTIN_PFCMPGE,
27995 IX86_BUILTIN_PFCMPGT,
27996 IX86_BUILTIN_PFMAX,
27997 IX86_BUILTIN_PFMIN,
27998 IX86_BUILTIN_PFMUL,
27999 IX86_BUILTIN_PFRCP,
28000 IX86_BUILTIN_PFRCPIT1,
28001 IX86_BUILTIN_PFRCPIT2,
28002 IX86_BUILTIN_PFRSQIT1,
28003 IX86_BUILTIN_PFRSQRT,
28004 IX86_BUILTIN_PFSUB,
28005 IX86_BUILTIN_PFSUBR,
28006 IX86_BUILTIN_PI2FD,
28007 IX86_BUILTIN_PMULHRW,
28009 /* 3DNow! Athlon Extensions */
28010 IX86_BUILTIN_PF2IW,
28011 IX86_BUILTIN_PFNACC,
28012 IX86_BUILTIN_PFPNACC,
28013 IX86_BUILTIN_PI2FW,
28014 IX86_BUILTIN_PSWAPDSI,
28015 IX86_BUILTIN_PSWAPDSF,
28017 /* SSE2 */
28018 IX86_BUILTIN_ADDPD,
28019 IX86_BUILTIN_ADDSD,
28020 IX86_BUILTIN_DIVPD,
28021 IX86_BUILTIN_DIVSD,
28022 IX86_BUILTIN_MULPD,
28023 IX86_BUILTIN_MULSD,
28024 IX86_BUILTIN_SUBPD,
28025 IX86_BUILTIN_SUBSD,
28027 IX86_BUILTIN_CMPEQPD,
28028 IX86_BUILTIN_CMPLTPD,
28029 IX86_BUILTIN_CMPLEPD,
28030 IX86_BUILTIN_CMPGTPD,
28031 IX86_BUILTIN_CMPGEPD,
28032 IX86_BUILTIN_CMPNEQPD,
28033 IX86_BUILTIN_CMPNLTPD,
28034 IX86_BUILTIN_CMPNLEPD,
28035 IX86_BUILTIN_CMPNGTPD,
28036 IX86_BUILTIN_CMPNGEPD,
28037 IX86_BUILTIN_CMPORDPD,
28038 IX86_BUILTIN_CMPUNORDPD,
28039 IX86_BUILTIN_CMPEQSD,
28040 IX86_BUILTIN_CMPLTSD,
28041 IX86_BUILTIN_CMPLESD,
28042 IX86_BUILTIN_CMPNEQSD,
28043 IX86_BUILTIN_CMPNLTSD,
28044 IX86_BUILTIN_CMPNLESD,
28045 IX86_BUILTIN_CMPORDSD,
28046 IX86_BUILTIN_CMPUNORDSD,
28048 IX86_BUILTIN_COMIEQSD,
28049 IX86_BUILTIN_COMILTSD,
28050 IX86_BUILTIN_COMILESD,
28051 IX86_BUILTIN_COMIGTSD,
28052 IX86_BUILTIN_COMIGESD,
28053 IX86_BUILTIN_COMINEQSD,
28054 IX86_BUILTIN_UCOMIEQSD,
28055 IX86_BUILTIN_UCOMILTSD,
28056 IX86_BUILTIN_UCOMILESD,
28057 IX86_BUILTIN_UCOMIGTSD,
28058 IX86_BUILTIN_UCOMIGESD,
28059 IX86_BUILTIN_UCOMINEQSD,
28061 IX86_BUILTIN_MAXPD,
28062 IX86_BUILTIN_MAXSD,
28063 IX86_BUILTIN_MINPD,
28064 IX86_BUILTIN_MINSD,
28066 IX86_BUILTIN_ANDPD,
28067 IX86_BUILTIN_ANDNPD,
28068 IX86_BUILTIN_ORPD,
28069 IX86_BUILTIN_XORPD,
28071 IX86_BUILTIN_SQRTPD,
28072 IX86_BUILTIN_SQRTSD,
28074 IX86_BUILTIN_UNPCKHPD,
28075 IX86_BUILTIN_UNPCKLPD,
28077 IX86_BUILTIN_SHUFPD,
28079 IX86_BUILTIN_LOADUPD,
28080 IX86_BUILTIN_STOREUPD,
28081 IX86_BUILTIN_MOVSD,
28083 IX86_BUILTIN_LOADHPD,
28084 IX86_BUILTIN_LOADLPD,
28086 IX86_BUILTIN_CVTDQ2PD,
28087 IX86_BUILTIN_CVTDQ2PS,
28089 IX86_BUILTIN_CVTPD2DQ,
28090 IX86_BUILTIN_CVTPD2PI,
28091 IX86_BUILTIN_CVTPD2PS,
28092 IX86_BUILTIN_CVTTPD2DQ,
28093 IX86_BUILTIN_CVTTPD2PI,
28095 IX86_BUILTIN_CVTPI2PD,
28096 IX86_BUILTIN_CVTSI2SD,
28097 IX86_BUILTIN_CVTSI642SD,
28099 IX86_BUILTIN_CVTSD2SI,
28100 IX86_BUILTIN_CVTSD2SI64,
28101 IX86_BUILTIN_CVTSD2SS,
28102 IX86_BUILTIN_CVTSS2SD,
28103 IX86_BUILTIN_CVTTSD2SI,
28104 IX86_BUILTIN_CVTTSD2SI64,
28106 IX86_BUILTIN_CVTPS2DQ,
28107 IX86_BUILTIN_CVTPS2PD,
28108 IX86_BUILTIN_CVTTPS2DQ,
28110 IX86_BUILTIN_MOVNTI,
28111 IX86_BUILTIN_MOVNTI64,
28112 IX86_BUILTIN_MOVNTPD,
28113 IX86_BUILTIN_MOVNTDQ,
28115 IX86_BUILTIN_MOVQ128,
28117 /* SSE2 MMX */
28118 IX86_BUILTIN_MASKMOVDQU,
28119 IX86_BUILTIN_MOVMSKPD,
28120 IX86_BUILTIN_PMOVMSKB128,
28122 IX86_BUILTIN_PACKSSWB128,
28123 IX86_BUILTIN_PACKSSDW128,
28124 IX86_BUILTIN_PACKUSWB128,
28126 IX86_BUILTIN_PADDB128,
28127 IX86_BUILTIN_PADDW128,
28128 IX86_BUILTIN_PADDD128,
28129 IX86_BUILTIN_PADDQ128,
28130 IX86_BUILTIN_PADDSB128,
28131 IX86_BUILTIN_PADDSW128,
28132 IX86_BUILTIN_PADDUSB128,
28133 IX86_BUILTIN_PADDUSW128,
28134 IX86_BUILTIN_PSUBB128,
28135 IX86_BUILTIN_PSUBW128,
28136 IX86_BUILTIN_PSUBD128,
28137 IX86_BUILTIN_PSUBQ128,
28138 IX86_BUILTIN_PSUBSB128,
28139 IX86_BUILTIN_PSUBSW128,
28140 IX86_BUILTIN_PSUBUSB128,
28141 IX86_BUILTIN_PSUBUSW128,
28143 IX86_BUILTIN_PAND128,
28144 IX86_BUILTIN_PANDN128,
28145 IX86_BUILTIN_POR128,
28146 IX86_BUILTIN_PXOR128,
28148 IX86_BUILTIN_PAVGB128,
28149 IX86_BUILTIN_PAVGW128,
28151 IX86_BUILTIN_PCMPEQB128,
28152 IX86_BUILTIN_PCMPEQW128,
28153 IX86_BUILTIN_PCMPEQD128,
28154 IX86_BUILTIN_PCMPGTB128,
28155 IX86_BUILTIN_PCMPGTW128,
28156 IX86_BUILTIN_PCMPGTD128,
28158 IX86_BUILTIN_PMADDWD128,
28160 IX86_BUILTIN_PMAXSW128,
28161 IX86_BUILTIN_PMAXUB128,
28162 IX86_BUILTIN_PMINSW128,
28163 IX86_BUILTIN_PMINUB128,
28165 IX86_BUILTIN_PMULUDQ,
28166 IX86_BUILTIN_PMULUDQ128,
28167 IX86_BUILTIN_PMULHUW128,
28168 IX86_BUILTIN_PMULHW128,
28169 IX86_BUILTIN_PMULLW128,
28171 IX86_BUILTIN_PSADBW128,
28172 IX86_BUILTIN_PSHUFHW,
28173 IX86_BUILTIN_PSHUFLW,
28174 IX86_BUILTIN_PSHUFD,
28176 IX86_BUILTIN_PSLLDQI128,
28177 IX86_BUILTIN_PSLLWI128,
28178 IX86_BUILTIN_PSLLDI128,
28179 IX86_BUILTIN_PSLLQI128,
28180 IX86_BUILTIN_PSRAWI128,
28181 IX86_BUILTIN_PSRADI128,
28182 IX86_BUILTIN_PSRLDQI128,
28183 IX86_BUILTIN_PSRLWI128,
28184 IX86_BUILTIN_PSRLDI128,
28185 IX86_BUILTIN_PSRLQI128,
28187 IX86_BUILTIN_PSLLDQ128,
28188 IX86_BUILTIN_PSLLW128,
28189 IX86_BUILTIN_PSLLD128,
28190 IX86_BUILTIN_PSLLQ128,
28191 IX86_BUILTIN_PSRAW128,
28192 IX86_BUILTIN_PSRAD128,
28193 IX86_BUILTIN_PSRLW128,
28194 IX86_BUILTIN_PSRLD128,
28195 IX86_BUILTIN_PSRLQ128,
28197 IX86_BUILTIN_PUNPCKHBW128,
28198 IX86_BUILTIN_PUNPCKHWD128,
28199 IX86_BUILTIN_PUNPCKHDQ128,
28200 IX86_BUILTIN_PUNPCKHQDQ128,
28201 IX86_BUILTIN_PUNPCKLBW128,
28202 IX86_BUILTIN_PUNPCKLWD128,
28203 IX86_BUILTIN_PUNPCKLDQ128,
28204 IX86_BUILTIN_PUNPCKLQDQ128,
28206 IX86_BUILTIN_CLFLUSH,
28207 IX86_BUILTIN_MFENCE,
28208 IX86_BUILTIN_LFENCE,
28209 IX86_BUILTIN_PAUSE,
28211 IX86_BUILTIN_FNSTENV,
28212 IX86_BUILTIN_FLDENV,
28213 IX86_BUILTIN_FNSTSW,
28214 IX86_BUILTIN_FNCLEX,
28216 IX86_BUILTIN_BSRSI,
28217 IX86_BUILTIN_BSRDI,
28218 IX86_BUILTIN_RDPMC,
28219 IX86_BUILTIN_RDTSC,
28220 IX86_BUILTIN_RDTSCP,
28221 IX86_BUILTIN_ROLQI,
28222 IX86_BUILTIN_ROLHI,
28223 IX86_BUILTIN_RORQI,
28224 IX86_BUILTIN_RORHI,
28226 /* SSE3. */
28227 IX86_BUILTIN_ADDSUBPS,
28228 IX86_BUILTIN_HADDPS,
28229 IX86_BUILTIN_HSUBPS,
28230 IX86_BUILTIN_MOVSHDUP,
28231 IX86_BUILTIN_MOVSLDUP,
28232 IX86_BUILTIN_ADDSUBPD,
28233 IX86_BUILTIN_HADDPD,
28234 IX86_BUILTIN_HSUBPD,
28235 IX86_BUILTIN_LDDQU,
28237 IX86_BUILTIN_MONITOR,
28238 IX86_BUILTIN_MWAIT,
28240 /* SSSE3. */
28241 IX86_BUILTIN_PHADDW,
28242 IX86_BUILTIN_PHADDD,
28243 IX86_BUILTIN_PHADDSW,
28244 IX86_BUILTIN_PHSUBW,
28245 IX86_BUILTIN_PHSUBD,
28246 IX86_BUILTIN_PHSUBSW,
28247 IX86_BUILTIN_PMADDUBSW,
28248 IX86_BUILTIN_PMULHRSW,
28249 IX86_BUILTIN_PSHUFB,
28250 IX86_BUILTIN_PSIGNB,
28251 IX86_BUILTIN_PSIGNW,
28252 IX86_BUILTIN_PSIGND,
28253 IX86_BUILTIN_PALIGNR,
28254 IX86_BUILTIN_PABSB,
28255 IX86_BUILTIN_PABSW,
28256 IX86_BUILTIN_PABSD,
28258 IX86_BUILTIN_PHADDW128,
28259 IX86_BUILTIN_PHADDD128,
28260 IX86_BUILTIN_PHADDSW128,
28261 IX86_BUILTIN_PHSUBW128,
28262 IX86_BUILTIN_PHSUBD128,
28263 IX86_BUILTIN_PHSUBSW128,
28264 IX86_BUILTIN_PMADDUBSW128,
28265 IX86_BUILTIN_PMULHRSW128,
28266 IX86_BUILTIN_PSHUFB128,
28267 IX86_BUILTIN_PSIGNB128,
28268 IX86_BUILTIN_PSIGNW128,
28269 IX86_BUILTIN_PSIGND128,
28270 IX86_BUILTIN_PALIGNR128,
28271 IX86_BUILTIN_PABSB128,
28272 IX86_BUILTIN_PABSW128,
28273 IX86_BUILTIN_PABSD128,
28275 /* AMDFAM10 - SSE4A New Instructions. */
28276 IX86_BUILTIN_MOVNTSD,
28277 IX86_BUILTIN_MOVNTSS,
28278 IX86_BUILTIN_EXTRQI,
28279 IX86_BUILTIN_EXTRQ,
28280 IX86_BUILTIN_INSERTQI,
28281 IX86_BUILTIN_INSERTQ,
28283 /* SSE4.1. */
28284 IX86_BUILTIN_BLENDPD,
28285 IX86_BUILTIN_BLENDPS,
28286 IX86_BUILTIN_BLENDVPD,
28287 IX86_BUILTIN_BLENDVPS,
28288 IX86_BUILTIN_PBLENDVB128,
28289 IX86_BUILTIN_PBLENDW128,
28291 IX86_BUILTIN_DPPD,
28292 IX86_BUILTIN_DPPS,
28294 IX86_BUILTIN_INSERTPS128,
28296 IX86_BUILTIN_MOVNTDQA,
28297 IX86_BUILTIN_MPSADBW128,
28298 IX86_BUILTIN_PACKUSDW128,
28299 IX86_BUILTIN_PCMPEQQ,
28300 IX86_BUILTIN_PHMINPOSUW128,
28302 IX86_BUILTIN_PMAXSB128,
28303 IX86_BUILTIN_PMAXSD128,
28304 IX86_BUILTIN_PMAXUD128,
28305 IX86_BUILTIN_PMAXUW128,
28307 IX86_BUILTIN_PMINSB128,
28308 IX86_BUILTIN_PMINSD128,
28309 IX86_BUILTIN_PMINUD128,
28310 IX86_BUILTIN_PMINUW128,
28312 IX86_BUILTIN_PMOVSXBW128,
28313 IX86_BUILTIN_PMOVSXBD128,
28314 IX86_BUILTIN_PMOVSXBQ128,
28315 IX86_BUILTIN_PMOVSXWD128,
28316 IX86_BUILTIN_PMOVSXWQ128,
28317 IX86_BUILTIN_PMOVSXDQ128,
28319 IX86_BUILTIN_PMOVZXBW128,
28320 IX86_BUILTIN_PMOVZXBD128,
28321 IX86_BUILTIN_PMOVZXBQ128,
28322 IX86_BUILTIN_PMOVZXWD128,
28323 IX86_BUILTIN_PMOVZXWQ128,
28324 IX86_BUILTIN_PMOVZXDQ128,
28326 IX86_BUILTIN_PMULDQ128,
28327 IX86_BUILTIN_PMULLD128,
28329 IX86_BUILTIN_ROUNDSD,
28330 IX86_BUILTIN_ROUNDSS,
28332 IX86_BUILTIN_ROUNDPD,
28333 IX86_BUILTIN_ROUNDPS,
28335 IX86_BUILTIN_FLOORPD,
28336 IX86_BUILTIN_CEILPD,
28337 IX86_BUILTIN_TRUNCPD,
28338 IX86_BUILTIN_RINTPD,
28339 IX86_BUILTIN_ROUNDPD_AZ,
28341 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28342 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28343 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28345 IX86_BUILTIN_FLOORPS,
28346 IX86_BUILTIN_CEILPS,
28347 IX86_BUILTIN_TRUNCPS,
28348 IX86_BUILTIN_RINTPS,
28349 IX86_BUILTIN_ROUNDPS_AZ,
28351 IX86_BUILTIN_FLOORPS_SFIX,
28352 IX86_BUILTIN_CEILPS_SFIX,
28353 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28355 IX86_BUILTIN_PTESTZ,
28356 IX86_BUILTIN_PTESTC,
28357 IX86_BUILTIN_PTESTNZC,
28359 IX86_BUILTIN_VEC_INIT_V2SI,
28360 IX86_BUILTIN_VEC_INIT_V4HI,
28361 IX86_BUILTIN_VEC_INIT_V8QI,
28362 IX86_BUILTIN_VEC_EXT_V2DF,
28363 IX86_BUILTIN_VEC_EXT_V2DI,
28364 IX86_BUILTIN_VEC_EXT_V4SF,
28365 IX86_BUILTIN_VEC_EXT_V4SI,
28366 IX86_BUILTIN_VEC_EXT_V8HI,
28367 IX86_BUILTIN_VEC_EXT_V2SI,
28368 IX86_BUILTIN_VEC_EXT_V4HI,
28369 IX86_BUILTIN_VEC_EXT_V16QI,
28370 IX86_BUILTIN_VEC_SET_V2DI,
28371 IX86_BUILTIN_VEC_SET_V4SF,
28372 IX86_BUILTIN_VEC_SET_V4SI,
28373 IX86_BUILTIN_VEC_SET_V8HI,
28374 IX86_BUILTIN_VEC_SET_V4HI,
28375 IX86_BUILTIN_VEC_SET_V16QI,
28377 IX86_BUILTIN_VEC_PACK_SFIX,
28378 IX86_BUILTIN_VEC_PACK_SFIX256,
28380 /* SSE4.2. */
28381 IX86_BUILTIN_CRC32QI,
28382 IX86_BUILTIN_CRC32HI,
28383 IX86_BUILTIN_CRC32SI,
28384 IX86_BUILTIN_CRC32DI,
28386 IX86_BUILTIN_PCMPESTRI128,
28387 IX86_BUILTIN_PCMPESTRM128,
28388 IX86_BUILTIN_PCMPESTRA128,
28389 IX86_BUILTIN_PCMPESTRC128,
28390 IX86_BUILTIN_PCMPESTRO128,
28391 IX86_BUILTIN_PCMPESTRS128,
28392 IX86_BUILTIN_PCMPESTRZ128,
28393 IX86_BUILTIN_PCMPISTRI128,
28394 IX86_BUILTIN_PCMPISTRM128,
28395 IX86_BUILTIN_PCMPISTRA128,
28396 IX86_BUILTIN_PCMPISTRC128,
28397 IX86_BUILTIN_PCMPISTRO128,
28398 IX86_BUILTIN_PCMPISTRS128,
28399 IX86_BUILTIN_PCMPISTRZ128,
28401 IX86_BUILTIN_PCMPGTQ,
28403 /* AES instructions */
28404 IX86_BUILTIN_AESENC128,
28405 IX86_BUILTIN_AESENCLAST128,
28406 IX86_BUILTIN_AESDEC128,
28407 IX86_BUILTIN_AESDECLAST128,
28408 IX86_BUILTIN_AESIMC128,
28409 IX86_BUILTIN_AESKEYGENASSIST128,
28411 /* PCLMUL instruction */
28412 IX86_BUILTIN_PCLMULQDQ128,
28414 /* AVX */
28415 IX86_BUILTIN_ADDPD256,
28416 IX86_BUILTIN_ADDPS256,
28417 IX86_BUILTIN_ADDSUBPD256,
28418 IX86_BUILTIN_ADDSUBPS256,
28419 IX86_BUILTIN_ANDPD256,
28420 IX86_BUILTIN_ANDPS256,
28421 IX86_BUILTIN_ANDNPD256,
28422 IX86_BUILTIN_ANDNPS256,
28423 IX86_BUILTIN_BLENDPD256,
28424 IX86_BUILTIN_BLENDPS256,
28425 IX86_BUILTIN_BLENDVPD256,
28426 IX86_BUILTIN_BLENDVPS256,
28427 IX86_BUILTIN_DIVPD256,
28428 IX86_BUILTIN_DIVPS256,
28429 IX86_BUILTIN_DPPS256,
28430 IX86_BUILTIN_HADDPD256,
28431 IX86_BUILTIN_HADDPS256,
28432 IX86_BUILTIN_HSUBPD256,
28433 IX86_BUILTIN_HSUBPS256,
28434 IX86_BUILTIN_MAXPD256,
28435 IX86_BUILTIN_MAXPS256,
28436 IX86_BUILTIN_MINPD256,
28437 IX86_BUILTIN_MINPS256,
28438 IX86_BUILTIN_MULPD256,
28439 IX86_BUILTIN_MULPS256,
28440 IX86_BUILTIN_ORPD256,
28441 IX86_BUILTIN_ORPS256,
28442 IX86_BUILTIN_SHUFPD256,
28443 IX86_BUILTIN_SHUFPS256,
28444 IX86_BUILTIN_SUBPD256,
28445 IX86_BUILTIN_SUBPS256,
28446 IX86_BUILTIN_XORPD256,
28447 IX86_BUILTIN_XORPS256,
28448 IX86_BUILTIN_CMPSD,
28449 IX86_BUILTIN_CMPSS,
28450 IX86_BUILTIN_CMPPD,
28451 IX86_BUILTIN_CMPPS,
28452 IX86_BUILTIN_CMPPD256,
28453 IX86_BUILTIN_CMPPS256,
28454 IX86_BUILTIN_CVTDQ2PD256,
28455 IX86_BUILTIN_CVTDQ2PS256,
28456 IX86_BUILTIN_CVTPD2PS256,
28457 IX86_BUILTIN_CVTPS2DQ256,
28458 IX86_BUILTIN_CVTPS2PD256,
28459 IX86_BUILTIN_CVTTPD2DQ256,
28460 IX86_BUILTIN_CVTPD2DQ256,
28461 IX86_BUILTIN_CVTTPS2DQ256,
28462 IX86_BUILTIN_EXTRACTF128PD256,
28463 IX86_BUILTIN_EXTRACTF128PS256,
28464 IX86_BUILTIN_EXTRACTF128SI256,
28465 IX86_BUILTIN_VZEROALL,
28466 IX86_BUILTIN_VZEROUPPER,
28467 IX86_BUILTIN_VPERMILVARPD,
28468 IX86_BUILTIN_VPERMILVARPS,
28469 IX86_BUILTIN_VPERMILVARPD256,
28470 IX86_BUILTIN_VPERMILVARPS256,
28471 IX86_BUILTIN_VPERMILPD,
28472 IX86_BUILTIN_VPERMILPS,
28473 IX86_BUILTIN_VPERMILPD256,
28474 IX86_BUILTIN_VPERMILPS256,
28475 IX86_BUILTIN_VPERMIL2PD,
28476 IX86_BUILTIN_VPERMIL2PS,
28477 IX86_BUILTIN_VPERMIL2PD256,
28478 IX86_BUILTIN_VPERMIL2PS256,
28479 IX86_BUILTIN_VPERM2F128PD256,
28480 IX86_BUILTIN_VPERM2F128PS256,
28481 IX86_BUILTIN_VPERM2F128SI256,
28482 IX86_BUILTIN_VBROADCASTSS,
28483 IX86_BUILTIN_VBROADCASTSD256,
28484 IX86_BUILTIN_VBROADCASTSS256,
28485 IX86_BUILTIN_VBROADCASTPD256,
28486 IX86_BUILTIN_VBROADCASTPS256,
28487 IX86_BUILTIN_VINSERTF128PD256,
28488 IX86_BUILTIN_VINSERTF128PS256,
28489 IX86_BUILTIN_VINSERTF128SI256,
28490 IX86_BUILTIN_LOADUPD256,
28491 IX86_BUILTIN_LOADUPS256,
28492 IX86_BUILTIN_STOREUPD256,
28493 IX86_BUILTIN_STOREUPS256,
28494 IX86_BUILTIN_LDDQU256,
28495 IX86_BUILTIN_MOVNTDQ256,
28496 IX86_BUILTIN_MOVNTPD256,
28497 IX86_BUILTIN_MOVNTPS256,
28498 IX86_BUILTIN_LOADDQU256,
28499 IX86_BUILTIN_STOREDQU256,
28500 IX86_BUILTIN_MASKLOADPD,
28501 IX86_BUILTIN_MASKLOADPS,
28502 IX86_BUILTIN_MASKSTOREPD,
28503 IX86_BUILTIN_MASKSTOREPS,
28504 IX86_BUILTIN_MASKLOADPD256,
28505 IX86_BUILTIN_MASKLOADPS256,
28506 IX86_BUILTIN_MASKSTOREPD256,
28507 IX86_BUILTIN_MASKSTOREPS256,
28508 IX86_BUILTIN_MOVSHDUP256,
28509 IX86_BUILTIN_MOVSLDUP256,
28510 IX86_BUILTIN_MOVDDUP256,
28512 IX86_BUILTIN_SQRTPD256,
28513 IX86_BUILTIN_SQRTPS256,
28514 IX86_BUILTIN_SQRTPS_NR256,
28515 IX86_BUILTIN_RSQRTPS256,
28516 IX86_BUILTIN_RSQRTPS_NR256,
28518 IX86_BUILTIN_RCPPS256,
28520 IX86_BUILTIN_ROUNDPD256,
28521 IX86_BUILTIN_ROUNDPS256,
28523 IX86_BUILTIN_FLOORPD256,
28524 IX86_BUILTIN_CEILPD256,
28525 IX86_BUILTIN_TRUNCPD256,
28526 IX86_BUILTIN_RINTPD256,
28527 IX86_BUILTIN_ROUNDPD_AZ256,
28529 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28530 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28531 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28533 IX86_BUILTIN_FLOORPS256,
28534 IX86_BUILTIN_CEILPS256,
28535 IX86_BUILTIN_TRUNCPS256,
28536 IX86_BUILTIN_RINTPS256,
28537 IX86_BUILTIN_ROUNDPS_AZ256,
28539 IX86_BUILTIN_FLOORPS_SFIX256,
28540 IX86_BUILTIN_CEILPS_SFIX256,
28541 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28543 IX86_BUILTIN_UNPCKHPD256,
28544 IX86_BUILTIN_UNPCKLPD256,
28545 IX86_BUILTIN_UNPCKHPS256,
28546 IX86_BUILTIN_UNPCKLPS256,
28548 IX86_BUILTIN_SI256_SI,
28549 IX86_BUILTIN_PS256_PS,
28550 IX86_BUILTIN_PD256_PD,
28551 IX86_BUILTIN_SI_SI256,
28552 IX86_BUILTIN_PS_PS256,
28553 IX86_BUILTIN_PD_PD256,
28555 IX86_BUILTIN_VTESTZPD,
28556 IX86_BUILTIN_VTESTCPD,
28557 IX86_BUILTIN_VTESTNZCPD,
28558 IX86_BUILTIN_VTESTZPS,
28559 IX86_BUILTIN_VTESTCPS,
28560 IX86_BUILTIN_VTESTNZCPS,
28561 IX86_BUILTIN_VTESTZPD256,
28562 IX86_BUILTIN_VTESTCPD256,
28563 IX86_BUILTIN_VTESTNZCPD256,
28564 IX86_BUILTIN_VTESTZPS256,
28565 IX86_BUILTIN_VTESTCPS256,
28566 IX86_BUILTIN_VTESTNZCPS256,
28567 IX86_BUILTIN_PTESTZ256,
28568 IX86_BUILTIN_PTESTC256,
28569 IX86_BUILTIN_PTESTNZC256,
28571 IX86_BUILTIN_MOVMSKPD256,
28572 IX86_BUILTIN_MOVMSKPS256,
28574 /* AVX2 */
28575 IX86_BUILTIN_MPSADBW256,
28576 IX86_BUILTIN_PABSB256,
28577 IX86_BUILTIN_PABSW256,
28578 IX86_BUILTIN_PABSD256,
28579 IX86_BUILTIN_PACKSSDW256,
28580 IX86_BUILTIN_PACKSSWB256,
28581 IX86_BUILTIN_PACKUSDW256,
28582 IX86_BUILTIN_PACKUSWB256,
28583 IX86_BUILTIN_PADDB256,
28584 IX86_BUILTIN_PADDW256,
28585 IX86_BUILTIN_PADDD256,
28586 IX86_BUILTIN_PADDQ256,
28587 IX86_BUILTIN_PADDSB256,
28588 IX86_BUILTIN_PADDSW256,
28589 IX86_BUILTIN_PADDUSB256,
28590 IX86_BUILTIN_PADDUSW256,
28591 IX86_BUILTIN_PALIGNR256,
28592 IX86_BUILTIN_AND256I,
28593 IX86_BUILTIN_ANDNOT256I,
28594 IX86_BUILTIN_PAVGB256,
28595 IX86_BUILTIN_PAVGW256,
28596 IX86_BUILTIN_PBLENDVB256,
28597 IX86_BUILTIN_PBLENDVW256,
28598 IX86_BUILTIN_PCMPEQB256,
28599 IX86_BUILTIN_PCMPEQW256,
28600 IX86_BUILTIN_PCMPEQD256,
28601 IX86_BUILTIN_PCMPEQQ256,
28602 IX86_BUILTIN_PCMPGTB256,
28603 IX86_BUILTIN_PCMPGTW256,
28604 IX86_BUILTIN_PCMPGTD256,
28605 IX86_BUILTIN_PCMPGTQ256,
28606 IX86_BUILTIN_PHADDW256,
28607 IX86_BUILTIN_PHADDD256,
28608 IX86_BUILTIN_PHADDSW256,
28609 IX86_BUILTIN_PHSUBW256,
28610 IX86_BUILTIN_PHSUBD256,
28611 IX86_BUILTIN_PHSUBSW256,
28612 IX86_BUILTIN_PMADDUBSW256,
28613 IX86_BUILTIN_PMADDWD256,
28614 IX86_BUILTIN_PMAXSB256,
28615 IX86_BUILTIN_PMAXSW256,
28616 IX86_BUILTIN_PMAXSD256,
28617 IX86_BUILTIN_PMAXUB256,
28618 IX86_BUILTIN_PMAXUW256,
28619 IX86_BUILTIN_PMAXUD256,
28620 IX86_BUILTIN_PMINSB256,
28621 IX86_BUILTIN_PMINSW256,
28622 IX86_BUILTIN_PMINSD256,
28623 IX86_BUILTIN_PMINUB256,
28624 IX86_BUILTIN_PMINUW256,
28625 IX86_BUILTIN_PMINUD256,
28626 IX86_BUILTIN_PMOVMSKB256,
28627 IX86_BUILTIN_PMOVSXBW256,
28628 IX86_BUILTIN_PMOVSXBD256,
28629 IX86_BUILTIN_PMOVSXBQ256,
28630 IX86_BUILTIN_PMOVSXWD256,
28631 IX86_BUILTIN_PMOVSXWQ256,
28632 IX86_BUILTIN_PMOVSXDQ256,
28633 IX86_BUILTIN_PMOVZXBW256,
28634 IX86_BUILTIN_PMOVZXBD256,
28635 IX86_BUILTIN_PMOVZXBQ256,
28636 IX86_BUILTIN_PMOVZXWD256,
28637 IX86_BUILTIN_PMOVZXWQ256,
28638 IX86_BUILTIN_PMOVZXDQ256,
28639 IX86_BUILTIN_PMULDQ256,
28640 IX86_BUILTIN_PMULHRSW256,
28641 IX86_BUILTIN_PMULHUW256,
28642 IX86_BUILTIN_PMULHW256,
28643 IX86_BUILTIN_PMULLW256,
28644 IX86_BUILTIN_PMULLD256,
28645 IX86_BUILTIN_PMULUDQ256,
28646 IX86_BUILTIN_POR256,
28647 IX86_BUILTIN_PSADBW256,
28648 IX86_BUILTIN_PSHUFB256,
28649 IX86_BUILTIN_PSHUFD256,
28650 IX86_BUILTIN_PSHUFHW256,
28651 IX86_BUILTIN_PSHUFLW256,
28652 IX86_BUILTIN_PSIGNB256,
28653 IX86_BUILTIN_PSIGNW256,
28654 IX86_BUILTIN_PSIGND256,
28655 IX86_BUILTIN_PSLLDQI256,
28656 IX86_BUILTIN_PSLLWI256,
28657 IX86_BUILTIN_PSLLW256,
28658 IX86_BUILTIN_PSLLDI256,
28659 IX86_BUILTIN_PSLLD256,
28660 IX86_BUILTIN_PSLLQI256,
28661 IX86_BUILTIN_PSLLQ256,
28662 IX86_BUILTIN_PSRAWI256,
28663 IX86_BUILTIN_PSRAW256,
28664 IX86_BUILTIN_PSRADI256,
28665 IX86_BUILTIN_PSRAD256,
28666 IX86_BUILTIN_PSRLDQI256,
28667 IX86_BUILTIN_PSRLWI256,
28668 IX86_BUILTIN_PSRLW256,
28669 IX86_BUILTIN_PSRLDI256,
28670 IX86_BUILTIN_PSRLD256,
28671 IX86_BUILTIN_PSRLQI256,
28672 IX86_BUILTIN_PSRLQ256,
28673 IX86_BUILTIN_PSUBB256,
28674 IX86_BUILTIN_PSUBW256,
28675 IX86_BUILTIN_PSUBD256,
28676 IX86_BUILTIN_PSUBQ256,
28677 IX86_BUILTIN_PSUBSB256,
28678 IX86_BUILTIN_PSUBSW256,
28679 IX86_BUILTIN_PSUBUSB256,
28680 IX86_BUILTIN_PSUBUSW256,
28681 IX86_BUILTIN_PUNPCKHBW256,
28682 IX86_BUILTIN_PUNPCKHWD256,
28683 IX86_BUILTIN_PUNPCKHDQ256,
28684 IX86_BUILTIN_PUNPCKHQDQ256,
28685 IX86_BUILTIN_PUNPCKLBW256,
28686 IX86_BUILTIN_PUNPCKLWD256,
28687 IX86_BUILTIN_PUNPCKLDQ256,
28688 IX86_BUILTIN_PUNPCKLQDQ256,
28689 IX86_BUILTIN_PXOR256,
28690 IX86_BUILTIN_MOVNTDQA256,
28691 IX86_BUILTIN_VBROADCASTSS_PS,
28692 IX86_BUILTIN_VBROADCASTSS_PS256,
28693 IX86_BUILTIN_VBROADCASTSD_PD256,
28694 IX86_BUILTIN_VBROADCASTSI256,
28695 IX86_BUILTIN_PBLENDD256,
28696 IX86_BUILTIN_PBLENDD128,
28697 IX86_BUILTIN_PBROADCASTB256,
28698 IX86_BUILTIN_PBROADCASTW256,
28699 IX86_BUILTIN_PBROADCASTD256,
28700 IX86_BUILTIN_PBROADCASTQ256,
28701 IX86_BUILTIN_PBROADCASTB128,
28702 IX86_BUILTIN_PBROADCASTW128,
28703 IX86_BUILTIN_PBROADCASTD128,
28704 IX86_BUILTIN_PBROADCASTQ128,
28705 IX86_BUILTIN_VPERMVARSI256,
28706 IX86_BUILTIN_VPERMDF256,
28707 IX86_BUILTIN_VPERMVARSF256,
28708 IX86_BUILTIN_VPERMDI256,
28709 IX86_BUILTIN_VPERMTI256,
28710 IX86_BUILTIN_VEXTRACT128I256,
28711 IX86_BUILTIN_VINSERT128I256,
28712 IX86_BUILTIN_MASKLOADD,
28713 IX86_BUILTIN_MASKLOADQ,
28714 IX86_BUILTIN_MASKLOADD256,
28715 IX86_BUILTIN_MASKLOADQ256,
28716 IX86_BUILTIN_MASKSTORED,
28717 IX86_BUILTIN_MASKSTOREQ,
28718 IX86_BUILTIN_MASKSTORED256,
28719 IX86_BUILTIN_MASKSTOREQ256,
28720 IX86_BUILTIN_PSLLVV4DI,
28721 IX86_BUILTIN_PSLLVV2DI,
28722 IX86_BUILTIN_PSLLVV8SI,
28723 IX86_BUILTIN_PSLLVV4SI,
28724 IX86_BUILTIN_PSRAVV8SI,
28725 IX86_BUILTIN_PSRAVV4SI,
28726 IX86_BUILTIN_PSRLVV4DI,
28727 IX86_BUILTIN_PSRLVV2DI,
28728 IX86_BUILTIN_PSRLVV8SI,
28729 IX86_BUILTIN_PSRLVV4SI,
28731 IX86_BUILTIN_GATHERSIV2DF,
28732 IX86_BUILTIN_GATHERSIV4DF,
28733 IX86_BUILTIN_GATHERDIV2DF,
28734 IX86_BUILTIN_GATHERDIV4DF,
28735 IX86_BUILTIN_GATHERSIV4SF,
28736 IX86_BUILTIN_GATHERSIV8SF,
28737 IX86_BUILTIN_GATHERDIV4SF,
28738 IX86_BUILTIN_GATHERDIV8SF,
28739 IX86_BUILTIN_GATHERSIV2DI,
28740 IX86_BUILTIN_GATHERSIV4DI,
28741 IX86_BUILTIN_GATHERDIV2DI,
28742 IX86_BUILTIN_GATHERDIV4DI,
28743 IX86_BUILTIN_GATHERSIV4SI,
28744 IX86_BUILTIN_GATHERSIV8SI,
28745 IX86_BUILTIN_GATHERDIV4SI,
28746 IX86_BUILTIN_GATHERDIV8SI,
28748 /* AVX512F */
28749 IX86_BUILTIN_SI512_SI256,
28750 IX86_BUILTIN_PD512_PD256,
28751 IX86_BUILTIN_PS512_PS256,
28752 IX86_BUILTIN_SI512_SI,
28753 IX86_BUILTIN_PD512_PD,
28754 IX86_BUILTIN_PS512_PS,
28755 IX86_BUILTIN_ADDPD512,
28756 IX86_BUILTIN_ADDPS512,
28757 IX86_BUILTIN_ADDSD_ROUND,
28758 IX86_BUILTIN_ADDSS_ROUND,
28759 IX86_BUILTIN_ALIGND512,
28760 IX86_BUILTIN_ALIGNQ512,
28761 IX86_BUILTIN_BLENDMD512,
28762 IX86_BUILTIN_BLENDMPD512,
28763 IX86_BUILTIN_BLENDMPS512,
28764 IX86_BUILTIN_BLENDMQ512,
28765 IX86_BUILTIN_BROADCASTF32X4_512,
28766 IX86_BUILTIN_BROADCASTF64X4_512,
28767 IX86_BUILTIN_BROADCASTI32X4_512,
28768 IX86_BUILTIN_BROADCASTI64X4_512,
28769 IX86_BUILTIN_BROADCASTSD512,
28770 IX86_BUILTIN_BROADCASTSS512,
28771 IX86_BUILTIN_CMPD512,
28772 IX86_BUILTIN_CMPPD512,
28773 IX86_BUILTIN_CMPPS512,
28774 IX86_BUILTIN_CMPQ512,
28775 IX86_BUILTIN_CMPSD_MASK,
28776 IX86_BUILTIN_CMPSS_MASK,
28777 IX86_BUILTIN_COMIDF,
28778 IX86_BUILTIN_COMISF,
28779 IX86_BUILTIN_COMPRESSPD512,
28780 IX86_BUILTIN_COMPRESSPDSTORE512,
28781 IX86_BUILTIN_COMPRESSPS512,
28782 IX86_BUILTIN_COMPRESSPSSTORE512,
28783 IX86_BUILTIN_CVTDQ2PD512,
28784 IX86_BUILTIN_CVTDQ2PS512,
28785 IX86_BUILTIN_CVTPD2DQ512,
28786 IX86_BUILTIN_CVTPD2PS512,
28787 IX86_BUILTIN_CVTPD2UDQ512,
28788 IX86_BUILTIN_CVTPH2PS512,
28789 IX86_BUILTIN_CVTPS2DQ512,
28790 IX86_BUILTIN_CVTPS2PD512,
28791 IX86_BUILTIN_CVTPS2PH512,
28792 IX86_BUILTIN_CVTPS2UDQ512,
28793 IX86_BUILTIN_CVTSD2SS_ROUND,
28794 IX86_BUILTIN_CVTSI2SD64,
28795 IX86_BUILTIN_CVTSI2SS32,
28796 IX86_BUILTIN_CVTSI2SS64,
28797 IX86_BUILTIN_CVTSS2SD_ROUND,
28798 IX86_BUILTIN_CVTTPD2DQ512,
28799 IX86_BUILTIN_CVTTPD2UDQ512,
28800 IX86_BUILTIN_CVTTPS2DQ512,
28801 IX86_BUILTIN_CVTTPS2UDQ512,
28802 IX86_BUILTIN_CVTUDQ2PD512,
28803 IX86_BUILTIN_CVTUDQ2PS512,
28804 IX86_BUILTIN_CVTUSI2SD32,
28805 IX86_BUILTIN_CVTUSI2SD64,
28806 IX86_BUILTIN_CVTUSI2SS32,
28807 IX86_BUILTIN_CVTUSI2SS64,
28808 IX86_BUILTIN_DIVPD512,
28809 IX86_BUILTIN_DIVPS512,
28810 IX86_BUILTIN_DIVSD_ROUND,
28811 IX86_BUILTIN_DIVSS_ROUND,
28812 IX86_BUILTIN_EXPANDPD512,
28813 IX86_BUILTIN_EXPANDPD512Z,
28814 IX86_BUILTIN_EXPANDPDLOAD512,
28815 IX86_BUILTIN_EXPANDPDLOAD512Z,
28816 IX86_BUILTIN_EXPANDPS512,
28817 IX86_BUILTIN_EXPANDPS512Z,
28818 IX86_BUILTIN_EXPANDPSLOAD512,
28819 IX86_BUILTIN_EXPANDPSLOAD512Z,
28820 IX86_BUILTIN_EXTRACTF32X4,
28821 IX86_BUILTIN_EXTRACTF64X4,
28822 IX86_BUILTIN_EXTRACTI32X4,
28823 IX86_BUILTIN_EXTRACTI64X4,
28824 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28825 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28826 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28827 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28828 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28829 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28830 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28831 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28832 IX86_BUILTIN_GETEXPPD512,
28833 IX86_BUILTIN_GETEXPPS512,
28834 IX86_BUILTIN_GETEXPSD128,
28835 IX86_BUILTIN_GETEXPSS128,
28836 IX86_BUILTIN_GETMANTPD512,
28837 IX86_BUILTIN_GETMANTPS512,
28838 IX86_BUILTIN_GETMANTSD128,
28839 IX86_BUILTIN_GETMANTSS128,
28840 IX86_BUILTIN_INSERTF32X4,
28841 IX86_BUILTIN_INSERTF64X4,
28842 IX86_BUILTIN_INSERTI32X4,
28843 IX86_BUILTIN_INSERTI64X4,
28844 IX86_BUILTIN_LOADAPD512,
28845 IX86_BUILTIN_LOADAPS512,
28846 IX86_BUILTIN_LOADDQUDI512,
28847 IX86_BUILTIN_LOADDQUSI512,
28848 IX86_BUILTIN_LOADUPD512,
28849 IX86_BUILTIN_LOADUPS512,
28850 IX86_BUILTIN_MAXPD512,
28851 IX86_BUILTIN_MAXPS512,
28852 IX86_BUILTIN_MAXSD_ROUND,
28853 IX86_BUILTIN_MAXSS_ROUND,
28854 IX86_BUILTIN_MINPD512,
28855 IX86_BUILTIN_MINPS512,
28856 IX86_BUILTIN_MINSD_ROUND,
28857 IX86_BUILTIN_MINSS_ROUND,
28858 IX86_BUILTIN_MOVAPD512,
28859 IX86_BUILTIN_MOVAPS512,
28860 IX86_BUILTIN_MOVDDUP512,
28861 IX86_BUILTIN_MOVDQA32LOAD512,
28862 IX86_BUILTIN_MOVDQA32STORE512,
28863 IX86_BUILTIN_MOVDQA32_512,
28864 IX86_BUILTIN_MOVDQA64LOAD512,
28865 IX86_BUILTIN_MOVDQA64STORE512,
28866 IX86_BUILTIN_MOVDQA64_512,
28867 IX86_BUILTIN_MOVNTDQ512,
28868 IX86_BUILTIN_MOVNTDQA512,
28869 IX86_BUILTIN_MOVNTPD512,
28870 IX86_BUILTIN_MOVNTPS512,
28871 IX86_BUILTIN_MOVSHDUP512,
28872 IX86_BUILTIN_MOVSLDUP512,
28873 IX86_BUILTIN_MULPD512,
28874 IX86_BUILTIN_MULPS512,
28875 IX86_BUILTIN_MULSD_ROUND,
28876 IX86_BUILTIN_MULSS_ROUND,
28877 IX86_BUILTIN_PABSD512,
28878 IX86_BUILTIN_PABSQ512,
28879 IX86_BUILTIN_PADDD512,
28880 IX86_BUILTIN_PADDQ512,
28881 IX86_BUILTIN_PANDD512,
28882 IX86_BUILTIN_PANDND512,
28883 IX86_BUILTIN_PANDNQ512,
28884 IX86_BUILTIN_PANDQ512,
28885 IX86_BUILTIN_PBROADCASTD512,
28886 IX86_BUILTIN_PBROADCASTD512_GPR,
28887 IX86_BUILTIN_PBROADCASTMB512,
28888 IX86_BUILTIN_PBROADCASTMW512,
28889 IX86_BUILTIN_PBROADCASTQ512,
28890 IX86_BUILTIN_PBROADCASTQ512_GPR,
28891 IX86_BUILTIN_PCMPEQD512_MASK,
28892 IX86_BUILTIN_PCMPEQQ512_MASK,
28893 IX86_BUILTIN_PCMPGTD512_MASK,
28894 IX86_BUILTIN_PCMPGTQ512_MASK,
28895 IX86_BUILTIN_PCOMPRESSD512,
28896 IX86_BUILTIN_PCOMPRESSDSTORE512,
28897 IX86_BUILTIN_PCOMPRESSQ512,
28898 IX86_BUILTIN_PCOMPRESSQSTORE512,
28899 IX86_BUILTIN_PEXPANDD512,
28900 IX86_BUILTIN_PEXPANDD512Z,
28901 IX86_BUILTIN_PEXPANDDLOAD512,
28902 IX86_BUILTIN_PEXPANDDLOAD512Z,
28903 IX86_BUILTIN_PEXPANDQ512,
28904 IX86_BUILTIN_PEXPANDQ512Z,
28905 IX86_BUILTIN_PEXPANDQLOAD512,
28906 IX86_BUILTIN_PEXPANDQLOAD512Z,
28907 IX86_BUILTIN_PMAXSD512,
28908 IX86_BUILTIN_PMAXSQ512,
28909 IX86_BUILTIN_PMAXUD512,
28910 IX86_BUILTIN_PMAXUQ512,
28911 IX86_BUILTIN_PMINSD512,
28912 IX86_BUILTIN_PMINSQ512,
28913 IX86_BUILTIN_PMINUD512,
28914 IX86_BUILTIN_PMINUQ512,
28915 IX86_BUILTIN_PMOVDB512,
28916 IX86_BUILTIN_PMOVDB512_MEM,
28917 IX86_BUILTIN_PMOVDW512,
28918 IX86_BUILTIN_PMOVDW512_MEM,
28919 IX86_BUILTIN_PMOVQB512,
28920 IX86_BUILTIN_PMOVQB512_MEM,
28921 IX86_BUILTIN_PMOVQD512,
28922 IX86_BUILTIN_PMOVQD512_MEM,
28923 IX86_BUILTIN_PMOVQW512,
28924 IX86_BUILTIN_PMOVQW512_MEM,
28925 IX86_BUILTIN_PMOVSDB512,
28926 IX86_BUILTIN_PMOVSDB512_MEM,
28927 IX86_BUILTIN_PMOVSDW512,
28928 IX86_BUILTIN_PMOVSDW512_MEM,
28929 IX86_BUILTIN_PMOVSQB512,
28930 IX86_BUILTIN_PMOVSQB512_MEM,
28931 IX86_BUILTIN_PMOVSQD512,
28932 IX86_BUILTIN_PMOVSQD512_MEM,
28933 IX86_BUILTIN_PMOVSQW512,
28934 IX86_BUILTIN_PMOVSQW512_MEM,
28935 IX86_BUILTIN_PMOVSXBD512,
28936 IX86_BUILTIN_PMOVSXBQ512,
28937 IX86_BUILTIN_PMOVSXDQ512,
28938 IX86_BUILTIN_PMOVSXWD512,
28939 IX86_BUILTIN_PMOVSXWQ512,
28940 IX86_BUILTIN_PMOVUSDB512,
28941 IX86_BUILTIN_PMOVUSDB512_MEM,
28942 IX86_BUILTIN_PMOVUSDW512,
28943 IX86_BUILTIN_PMOVUSDW512_MEM,
28944 IX86_BUILTIN_PMOVUSQB512,
28945 IX86_BUILTIN_PMOVUSQB512_MEM,
28946 IX86_BUILTIN_PMOVUSQD512,
28947 IX86_BUILTIN_PMOVUSQD512_MEM,
28948 IX86_BUILTIN_PMOVUSQW512,
28949 IX86_BUILTIN_PMOVUSQW512_MEM,
28950 IX86_BUILTIN_PMOVZXBD512,
28951 IX86_BUILTIN_PMOVZXBQ512,
28952 IX86_BUILTIN_PMOVZXDQ512,
28953 IX86_BUILTIN_PMOVZXWD512,
28954 IX86_BUILTIN_PMOVZXWQ512,
28955 IX86_BUILTIN_PMULDQ512,
28956 IX86_BUILTIN_PMULLD512,
28957 IX86_BUILTIN_PMULUDQ512,
28958 IX86_BUILTIN_PORD512,
28959 IX86_BUILTIN_PORQ512,
28960 IX86_BUILTIN_PROLD512,
28961 IX86_BUILTIN_PROLQ512,
28962 IX86_BUILTIN_PROLVD512,
28963 IX86_BUILTIN_PROLVQ512,
28964 IX86_BUILTIN_PRORD512,
28965 IX86_BUILTIN_PRORQ512,
28966 IX86_BUILTIN_PRORVD512,
28967 IX86_BUILTIN_PRORVQ512,
28968 IX86_BUILTIN_PSHUFD512,
28969 IX86_BUILTIN_PSLLD512,
28970 IX86_BUILTIN_PSLLDI512,
28971 IX86_BUILTIN_PSLLQ512,
28972 IX86_BUILTIN_PSLLQI512,
28973 IX86_BUILTIN_PSLLVV16SI,
28974 IX86_BUILTIN_PSLLVV8DI,
28975 IX86_BUILTIN_PSRAD512,
28976 IX86_BUILTIN_PSRADI512,
28977 IX86_BUILTIN_PSRAQ512,
28978 IX86_BUILTIN_PSRAQI512,
28979 IX86_BUILTIN_PSRAVV16SI,
28980 IX86_BUILTIN_PSRAVV8DI,
28981 IX86_BUILTIN_PSRLD512,
28982 IX86_BUILTIN_PSRLDI512,
28983 IX86_BUILTIN_PSRLQ512,
28984 IX86_BUILTIN_PSRLQI512,
28985 IX86_BUILTIN_PSRLVV16SI,
28986 IX86_BUILTIN_PSRLVV8DI,
28987 IX86_BUILTIN_PSUBD512,
28988 IX86_BUILTIN_PSUBQ512,
28989 IX86_BUILTIN_PTESTMD512,
28990 IX86_BUILTIN_PTESTMQ512,
28991 IX86_BUILTIN_PTESTNMD512,
28992 IX86_BUILTIN_PTESTNMQ512,
28993 IX86_BUILTIN_PUNPCKHDQ512,
28994 IX86_BUILTIN_PUNPCKHQDQ512,
28995 IX86_BUILTIN_PUNPCKLDQ512,
28996 IX86_BUILTIN_PUNPCKLQDQ512,
28997 IX86_BUILTIN_PXORD512,
28998 IX86_BUILTIN_PXORQ512,
28999 IX86_BUILTIN_RCP14PD512,
29000 IX86_BUILTIN_RCP14PS512,
29001 IX86_BUILTIN_RCP14SD,
29002 IX86_BUILTIN_RCP14SS,
29003 IX86_BUILTIN_RNDSCALEPD,
29004 IX86_BUILTIN_RNDSCALEPS,
29005 IX86_BUILTIN_RNDSCALESD,
29006 IX86_BUILTIN_RNDSCALESS,
29007 IX86_BUILTIN_RSQRT14PD512,
29008 IX86_BUILTIN_RSQRT14PS512,
29009 IX86_BUILTIN_RSQRT14SD,
29010 IX86_BUILTIN_RSQRT14SS,
29011 IX86_BUILTIN_SCALEFPD512,
29012 IX86_BUILTIN_SCALEFPS512,
29013 IX86_BUILTIN_SCALEFSD,
29014 IX86_BUILTIN_SCALEFSS,
29015 IX86_BUILTIN_SHUFPD512,
29016 IX86_BUILTIN_SHUFPS512,
29017 IX86_BUILTIN_SHUF_F32x4,
29018 IX86_BUILTIN_SHUF_F64x2,
29019 IX86_BUILTIN_SHUF_I32x4,
29020 IX86_BUILTIN_SHUF_I64x2,
29021 IX86_BUILTIN_SQRTPD512,
29022 IX86_BUILTIN_SQRTPD512_MASK,
29023 IX86_BUILTIN_SQRTPS512_MASK,
29024 IX86_BUILTIN_SQRTPS_NR512,
29025 IX86_BUILTIN_SQRTSD_ROUND,
29026 IX86_BUILTIN_SQRTSS_ROUND,
29027 IX86_BUILTIN_STOREAPD512,
29028 IX86_BUILTIN_STOREAPS512,
29029 IX86_BUILTIN_STOREDQUDI512,
29030 IX86_BUILTIN_STOREDQUSI512,
29031 IX86_BUILTIN_STOREUPD512,
29032 IX86_BUILTIN_STOREUPS512,
29033 IX86_BUILTIN_SUBPD512,
29034 IX86_BUILTIN_SUBPS512,
29035 IX86_BUILTIN_SUBSD_ROUND,
29036 IX86_BUILTIN_SUBSS_ROUND,
29037 IX86_BUILTIN_UCMPD512,
29038 IX86_BUILTIN_UCMPQ512,
29039 IX86_BUILTIN_UNPCKHPD512,
29040 IX86_BUILTIN_UNPCKHPS512,
29041 IX86_BUILTIN_UNPCKLPD512,
29042 IX86_BUILTIN_UNPCKLPS512,
29043 IX86_BUILTIN_VCVTSD2SI32,
29044 IX86_BUILTIN_VCVTSD2SI64,
29045 IX86_BUILTIN_VCVTSD2USI32,
29046 IX86_BUILTIN_VCVTSD2USI64,
29047 IX86_BUILTIN_VCVTSS2SI32,
29048 IX86_BUILTIN_VCVTSS2SI64,
29049 IX86_BUILTIN_VCVTSS2USI32,
29050 IX86_BUILTIN_VCVTSS2USI64,
29051 IX86_BUILTIN_VCVTTSD2SI32,
29052 IX86_BUILTIN_VCVTTSD2SI64,
29053 IX86_BUILTIN_VCVTTSD2USI32,
29054 IX86_BUILTIN_VCVTTSD2USI64,
29055 IX86_BUILTIN_VCVTTSS2SI32,
29056 IX86_BUILTIN_VCVTTSS2SI64,
29057 IX86_BUILTIN_VCVTTSS2USI32,
29058 IX86_BUILTIN_VCVTTSS2USI64,
29059 IX86_BUILTIN_VFMADDPD512_MASK,
29060 IX86_BUILTIN_VFMADDPD512_MASK3,
29061 IX86_BUILTIN_VFMADDPD512_MASKZ,
29062 IX86_BUILTIN_VFMADDPS512_MASK,
29063 IX86_BUILTIN_VFMADDPS512_MASK3,
29064 IX86_BUILTIN_VFMADDPS512_MASKZ,
29065 IX86_BUILTIN_VFMADDSD3_ROUND,
29066 IX86_BUILTIN_VFMADDSS3_ROUND,
29067 IX86_BUILTIN_VFMADDSUBPD512_MASK,
29068 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
29069 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
29070 IX86_BUILTIN_VFMADDSUBPS512_MASK,
29071 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
29072 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
29073 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
29074 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
29075 IX86_BUILTIN_VFMSUBPD512_MASK3,
29076 IX86_BUILTIN_VFMSUBPS512_MASK3,
29077 IX86_BUILTIN_VFMSUBSD3_MASK3,
29078 IX86_BUILTIN_VFMSUBSS3_MASK3,
29079 IX86_BUILTIN_VFNMADDPD512_MASK,
29080 IX86_BUILTIN_VFNMADDPS512_MASK,
29081 IX86_BUILTIN_VFNMSUBPD512_MASK,
29082 IX86_BUILTIN_VFNMSUBPD512_MASK3,
29083 IX86_BUILTIN_VFNMSUBPS512_MASK,
29084 IX86_BUILTIN_VFNMSUBPS512_MASK3,
29085 IX86_BUILTIN_VPCLZCNTD512,
29086 IX86_BUILTIN_VPCLZCNTQ512,
29087 IX86_BUILTIN_VPCONFLICTD512,
29088 IX86_BUILTIN_VPCONFLICTQ512,
29089 IX86_BUILTIN_VPERMDF512,
29090 IX86_BUILTIN_VPERMDI512,
29091 IX86_BUILTIN_VPERMI2VARD512,
29092 IX86_BUILTIN_VPERMI2VARPD512,
29093 IX86_BUILTIN_VPERMI2VARPS512,
29094 IX86_BUILTIN_VPERMI2VARQ512,
29095 IX86_BUILTIN_VPERMILPD512,
29096 IX86_BUILTIN_VPERMILPS512,
29097 IX86_BUILTIN_VPERMILVARPD512,
29098 IX86_BUILTIN_VPERMILVARPS512,
29099 IX86_BUILTIN_VPERMT2VARD512,
29100 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
29101 IX86_BUILTIN_VPERMT2VARPD512,
29102 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
29103 IX86_BUILTIN_VPERMT2VARPS512,
29104 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
29105 IX86_BUILTIN_VPERMT2VARQ512,
29106 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
29107 IX86_BUILTIN_VPERMVARDF512,
29108 IX86_BUILTIN_VPERMVARDI512,
29109 IX86_BUILTIN_VPERMVARSF512,
29110 IX86_BUILTIN_VPERMVARSI512,
29111 IX86_BUILTIN_VTERNLOGD512_MASK,
29112 IX86_BUILTIN_VTERNLOGD512_MASKZ,
29113 IX86_BUILTIN_VTERNLOGQ512_MASK,
29114 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
29116 /* Mask arithmetic operations */
29117 IX86_BUILTIN_KAND16,
29118 IX86_BUILTIN_KANDN16,
29119 IX86_BUILTIN_KNOT16,
29120 IX86_BUILTIN_KOR16,
29121 IX86_BUILTIN_KORTESTC16,
29122 IX86_BUILTIN_KORTESTZ16,
29123 IX86_BUILTIN_KUNPCKBW,
29124 IX86_BUILTIN_KXNOR16,
29125 IX86_BUILTIN_KXOR16,
29126 IX86_BUILTIN_KMOV16,
29128 /* AVX512VL. */
29129 IX86_BUILTIN_PMOVUSQD256_MEM,
29130 IX86_BUILTIN_PMOVUSQD128_MEM,
29131 IX86_BUILTIN_PMOVSQD256_MEM,
29132 IX86_BUILTIN_PMOVSQD128_MEM,
29133 IX86_BUILTIN_PMOVQD256_MEM,
29134 IX86_BUILTIN_PMOVQD128_MEM,
29135 IX86_BUILTIN_PMOVUSQW256_MEM,
29136 IX86_BUILTIN_PMOVUSQW128_MEM,
29137 IX86_BUILTIN_PMOVSQW256_MEM,
29138 IX86_BUILTIN_PMOVSQW128_MEM,
29139 IX86_BUILTIN_PMOVQW256_MEM,
29140 IX86_BUILTIN_PMOVQW128_MEM,
29141 IX86_BUILTIN_PMOVUSQB256_MEM,
29142 IX86_BUILTIN_PMOVUSQB128_MEM,
29143 IX86_BUILTIN_PMOVSQB256_MEM,
29144 IX86_BUILTIN_PMOVSQB128_MEM,
29145 IX86_BUILTIN_PMOVQB256_MEM,
29146 IX86_BUILTIN_PMOVQB128_MEM,
29147 IX86_BUILTIN_PMOVUSDW256_MEM,
29148 IX86_BUILTIN_PMOVUSDW128_MEM,
29149 IX86_BUILTIN_PMOVSDW256_MEM,
29150 IX86_BUILTIN_PMOVSDW128_MEM,
29151 IX86_BUILTIN_PMOVDW256_MEM,
29152 IX86_BUILTIN_PMOVDW128_MEM,
29153 IX86_BUILTIN_PMOVUSDB256_MEM,
29154 IX86_BUILTIN_PMOVUSDB128_MEM,
29155 IX86_BUILTIN_PMOVSDB256_MEM,
29156 IX86_BUILTIN_PMOVSDB128_MEM,
29157 IX86_BUILTIN_PMOVDB256_MEM,
29158 IX86_BUILTIN_PMOVDB128_MEM,
29159 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29160 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29161 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29162 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29163 IX86_BUILTIN_MOVDQA64STORE256_MASK,
29164 IX86_BUILTIN_MOVDQA64STORE128_MASK,
29165 IX86_BUILTIN_MOVDQA32STORE256_MASK,
29166 IX86_BUILTIN_MOVDQA32STORE128_MASK,
29167 IX86_BUILTIN_LOADAPD256_MASK,
29168 IX86_BUILTIN_LOADAPD128_MASK,
29169 IX86_BUILTIN_LOADAPS256_MASK,
29170 IX86_BUILTIN_LOADAPS128_MASK,
29171 IX86_BUILTIN_STOREAPD256_MASK,
29172 IX86_BUILTIN_STOREAPD128_MASK,
29173 IX86_BUILTIN_STOREAPS256_MASK,
29174 IX86_BUILTIN_STOREAPS128_MASK,
29175 IX86_BUILTIN_LOADUPD256_MASK,
29176 IX86_BUILTIN_LOADUPD128_MASK,
29177 IX86_BUILTIN_LOADUPS256_MASK,
29178 IX86_BUILTIN_LOADUPS128_MASK,
29179 IX86_BUILTIN_STOREUPD256_MASK,
29180 IX86_BUILTIN_STOREUPD128_MASK,
29181 IX86_BUILTIN_STOREUPS256_MASK,
29182 IX86_BUILTIN_STOREUPS128_MASK,
29183 IX86_BUILTIN_LOADDQUDI256_MASK,
29184 IX86_BUILTIN_LOADDQUDI128_MASK,
29185 IX86_BUILTIN_LOADDQUSI256_MASK,
29186 IX86_BUILTIN_LOADDQUSI128_MASK,
29187 IX86_BUILTIN_LOADDQUHI256_MASK,
29188 IX86_BUILTIN_LOADDQUHI128_MASK,
29189 IX86_BUILTIN_LOADDQUQI256_MASK,
29190 IX86_BUILTIN_LOADDQUQI128_MASK,
29191 IX86_BUILTIN_STOREDQUDI256_MASK,
29192 IX86_BUILTIN_STOREDQUDI128_MASK,
29193 IX86_BUILTIN_STOREDQUSI256_MASK,
29194 IX86_BUILTIN_STOREDQUSI128_MASK,
29195 IX86_BUILTIN_STOREDQUHI256_MASK,
29196 IX86_BUILTIN_STOREDQUHI128_MASK,
29197 IX86_BUILTIN_STOREDQUQI256_MASK,
29198 IX86_BUILTIN_STOREDQUQI128_MASK,
29199 IX86_BUILTIN_COMPRESSPDSTORE256,
29200 IX86_BUILTIN_COMPRESSPDSTORE128,
29201 IX86_BUILTIN_COMPRESSPSSTORE256,
29202 IX86_BUILTIN_COMPRESSPSSTORE128,
29203 IX86_BUILTIN_PCOMPRESSQSTORE256,
29204 IX86_BUILTIN_PCOMPRESSQSTORE128,
29205 IX86_BUILTIN_PCOMPRESSDSTORE256,
29206 IX86_BUILTIN_PCOMPRESSDSTORE128,
29207 IX86_BUILTIN_EXPANDPDLOAD256,
29208 IX86_BUILTIN_EXPANDPDLOAD128,
29209 IX86_BUILTIN_EXPANDPSLOAD256,
29210 IX86_BUILTIN_EXPANDPSLOAD128,
29211 IX86_BUILTIN_PEXPANDQLOAD256,
29212 IX86_BUILTIN_PEXPANDQLOAD128,
29213 IX86_BUILTIN_PEXPANDDLOAD256,
29214 IX86_BUILTIN_PEXPANDDLOAD128,
29215 IX86_BUILTIN_EXPANDPDLOAD256Z,
29216 IX86_BUILTIN_EXPANDPDLOAD128Z,
29217 IX86_BUILTIN_EXPANDPSLOAD256Z,
29218 IX86_BUILTIN_EXPANDPSLOAD128Z,
29219 IX86_BUILTIN_PEXPANDQLOAD256Z,
29220 IX86_BUILTIN_PEXPANDQLOAD128Z,
29221 IX86_BUILTIN_PEXPANDDLOAD256Z,
29222 IX86_BUILTIN_PEXPANDDLOAD128Z,
29223 IX86_BUILTIN_PALIGNR256_MASK,
29224 IX86_BUILTIN_PALIGNR128_MASK,
29225 IX86_BUILTIN_MOVDQA64_256_MASK,
29226 IX86_BUILTIN_MOVDQA64_128_MASK,
29227 IX86_BUILTIN_MOVDQA32_256_MASK,
29228 IX86_BUILTIN_MOVDQA32_128_MASK,
29229 IX86_BUILTIN_MOVAPD256_MASK,
29230 IX86_BUILTIN_MOVAPD128_MASK,
29231 IX86_BUILTIN_MOVAPS256_MASK,
29232 IX86_BUILTIN_MOVAPS128_MASK,
29233 IX86_BUILTIN_MOVDQUHI256_MASK,
29234 IX86_BUILTIN_MOVDQUHI128_MASK,
29235 IX86_BUILTIN_MOVDQUQI256_MASK,
29236 IX86_BUILTIN_MOVDQUQI128_MASK,
29237 IX86_BUILTIN_MINPS128_MASK,
29238 IX86_BUILTIN_MAXPS128_MASK,
29239 IX86_BUILTIN_MINPD128_MASK,
29240 IX86_BUILTIN_MAXPD128_MASK,
29241 IX86_BUILTIN_MAXPD256_MASK,
29242 IX86_BUILTIN_MAXPS256_MASK,
29243 IX86_BUILTIN_MINPD256_MASK,
29244 IX86_BUILTIN_MINPS256_MASK,
29245 IX86_BUILTIN_MULPS128_MASK,
29246 IX86_BUILTIN_DIVPS128_MASK,
29247 IX86_BUILTIN_MULPD128_MASK,
29248 IX86_BUILTIN_DIVPD128_MASK,
29249 IX86_BUILTIN_DIVPD256_MASK,
29250 IX86_BUILTIN_DIVPS256_MASK,
29251 IX86_BUILTIN_MULPD256_MASK,
29252 IX86_BUILTIN_MULPS256_MASK,
29253 IX86_BUILTIN_ADDPD128_MASK,
29254 IX86_BUILTIN_ADDPD256_MASK,
29255 IX86_BUILTIN_ADDPS128_MASK,
29256 IX86_BUILTIN_ADDPS256_MASK,
29257 IX86_BUILTIN_SUBPD128_MASK,
29258 IX86_BUILTIN_SUBPD256_MASK,
29259 IX86_BUILTIN_SUBPS128_MASK,
29260 IX86_BUILTIN_SUBPS256_MASK,
29261 IX86_BUILTIN_XORPD256_MASK,
29262 IX86_BUILTIN_XORPD128_MASK,
29263 IX86_BUILTIN_XORPS256_MASK,
29264 IX86_BUILTIN_XORPS128_MASK,
29265 IX86_BUILTIN_ORPD256_MASK,
29266 IX86_BUILTIN_ORPD128_MASK,
29267 IX86_BUILTIN_ORPS256_MASK,
29268 IX86_BUILTIN_ORPS128_MASK,
29269 IX86_BUILTIN_BROADCASTF32x2_256,
29270 IX86_BUILTIN_BROADCASTI32x2_256,
29271 IX86_BUILTIN_BROADCASTI32x2_128,
29272 IX86_BUILTIN_BROADCASTF64X2_256,
29273 IX86_BUILTIN_BROADCASTI64X2_256,
29274 IX86_BUILTIN_BROADCASTF32X4_256,
29275 IX86_BUILTIN_BROADCASTI32X4_256,
29276 IX86_BUILTIN_EXTRACTF32X4_256,
29277 IX86_BUILTIN_EXTRACTI32X4_256,
29278 IX86_BUILTIN_DBPSADBW256,
29279 IX86_BUILTIN_DBPSADBW128,
29280 IX86_BUILTIN_CVTTPD2QQ256,
29281 IX86_BUILTIN_CVTTPD2QQ128,
29282 IX86_BUILTIN_CVTTPD2UQQ256,
29283 IX86_BUILTIN_CVTTPD2UQQ128,
29284 IX86_BUILTIN_CVTPD2QQ256,
29285 IX86_BUILTIN_CVTPD2QQ128,
29286 IX86_BUILTIN_CVTPD2UQQ256,
29287 IX86_BUILTIN_CVTPD2UQQ128,
29288 IX86_BUILTIN_CVTPD2UDQ256_MASK,
29289 IX86_BUILTIN_CVTPD2UDQ128_MASK,
29290 IX86_BUILTIN_CVTTPS2QQ256,
29291 IX86_BUILTIN_CVTTPS2QQ128,
29292 IX86_BUILTIN_CVTTPS2UQQ256,
29293 IX86_BUILTIN_CVTTPS2UQQ128,
29294 IX86_BUILTIN_CVTTPS2DQ256_MASK,
29295 IX86_BUILTIN_CVTTPS2DQ128_MASK,
29296 IX86_BUILTIN_CVTTPS2UDQ256,
29297 IX86_BUILTIN_CVTTPS2UDQ128,
29298 IX86_BUILTIN_CVTTPD2DQ256_MASK,
29299 IX86_BUILTIN_CVTTPD2DQ128_MASK,
29300 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29301 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29302 IX86_BUILTIN_CVTPD2DQ256_MASK,
29303 IX86_BUILTIN_CVTPD2DQ128_MASK,
29304 IX86_BUILTIN_CVTDQ2PD256_MASK,
29305 IX86_BUILTIN_CVTDQ2PD128_MASK,
29306 IX86_BUILTIN_CVTUDQ2PD256_MASK,
29307 IX86_BUILTIN_CVTUDQ2PD128_MASK,
29308 IX86_BUILTIN_CVTDQ2PS256_MASK,
29309 IX86_BUILTIN_CVTDQ2PS128_MASK,
29310 IX86_BUILTIN_CVTUDQ2PS256_MASK,
29311 IX86_BUILTIN_CVTUDQ2PS128_MASK,
29312 IX86_BUILTIN_CVTPS2PD256_MASK,
29313 IX86_BUILTIN_CVTPS2PD128_MASK,
29314 IX86_BUILTIN_PBROADCASTB256_MASK,
29315 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29316 IX86_BUILTIN_PBROADCASTB128_MASK,
29317 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29318 IX86_BUILTIN_PBROADCASTW256_MASK,
29319 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29320 IX86_BUILTIN_PBROADCASTW128_MASK,
29321 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29322 IX86_BUILTIN_PBROADCASTD256_MASK,
29323 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29324 IX86_BUILTIN_PBROADCASTD128_MASK,
29325 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29326 IX86_BUILTIN_PBROADCASTQ256_MASK,
29327 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29328 IX86_BUILTIN_PBROADCASTQ128_MASK,
29329 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29330 IX86_BUILTIN_BROADCASTSS256,
29331 IX86_BUILTIN_BROADCASTSS128,
29332 IX86_BUILTIN_BROADCASTSD256,
29333 IX86_BUILTIN_EXTRACTF64X2_256,
29334 IX86_BUILTIN_EXTRACTI64X2_256,
29335 IX86_BUILTIN_INSERTF32X4_256,
29336 IX86_BUILTIN_INSERTI32X4_256,
29337 IX86_BUILTIN_PMOVSXBW256_MASK,
29338 IX86_BUILTIN_PMOVSXBW128_MASK,
29339 IX86_BUILTIN_PMOVSXBD256_MASK,
29340 IX86_BUILTIN_PMOVSXBD128_MASK,
29341 IX86_BUILTIN_PMOVSXBQ256_MASK,
29342 IX86_BUILTIN_PMOVSXBQ128_MASK,
29343 IX86_BUILTIN_PMOVSXWD256_MASK,
29344 IX86_BUILTIN_PMOVSXWD128_MASK,
29345 IX86_BUILTIN_PMOVSXWQ256_MASK,
29346 IX86_BUILTIN_PMOVSXWQ128_MASK,
29347 IX86_BUILTIN_PMOVSXDQ256_MASK,
29348 IX86_BUILTIN_PMOVSXDQ128_MASK,
29349 IX86_BUILTIN_PMOVZXBW256_MASK,
29350 IX86_BUILTIN_PMOVZXBW128_MASK,
29351 IX86_BUILTIN_PMOVZXBD256_MASK,
29352 IX86_BUILTIN_PMOVZXBD128_MASK,
29353 IX86_BUILTIN_PMOVZXBQ256_MASK,
29354 IX86_BUILTIN_PMOVZXBQ128_MASK,
29355 IX86_BUILTIN_PMOVZXWD256_MASK,
29356 IX86_BUILTIN_PMOVZXWD128_MASK,
29357 IX86_BUILTIN_PMOVZXWQ256_MASK,
29358 IX86_BUILTIN_PMOVZXWQ128_MASK,
29359 IX86_BUILTIN_PMOVZXDQ256_MASK,
29360 IX86_BUILTIN_PMOVZXDQ128_MASK,
29361 IX86_BUILTIN_REDUCEPD256_MASK,
29362 IX86_BUILTIN_REDUCEPD128_MASK,
29363 IX86_BUILTIN_REDUCEPS256_MASK,
29364 IX86_BUILTIN_REDUCEPS128_MASK,
29365 IX86_BUILTIN_REDUCESD_MASK,
29366 IX86_BUILTIN_REDUCESS_MASK,
29367 IX86_BUILTIN_VPERMVARHI256_MASK,
29368 IX86_BUILTIN_VPERMVARHI128_MASK,
29369 IX86_BUILTIN_VPERMT2VARHI256,
29370 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29371 IX86_BUILTIN_VPERMT2VARHI128,
29372 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29373 IX86_BUILTIN_VPERMI2VARHI256,
29374 IX86_BUILTIN_VPERMI2VARHI128,
29375 IX86_BUILTIN_RCP14PD256,
29376 IX86_BUILTIN_RCP14PD128,
29377 IX86_BUILTIN_RCP14PS256,
29378 IX86_BUILTIN_RCP14PS128,
29379 IX86_BUILTIN_RSQRT14PD256_MASK,
29380 IX86_BUILTIN_RSQRT14PD128_MASK,
29381 IX86_BUILTIN_RSQRT14PS256_MASK,
29382 IX86_BUILTIN_RSQRT14PS128_MASK,
29383 IX86_BUILTIN_SQRTPD256_MASK,
29384 IX86_BUILTIN_SQRTPD128_MASK,
29385 IX86_BUILTIN_SQRTPS256_MASK,
29386 IX86_BUILTIN_SQRTPS128_MASK,
29387 IX86_BUILTIN_PADDB128_MASK,
29388 IX86_BUILTIN_PADDW128_MASK,
29389 IX86_BUILTIN_PADDD128_MASK,
29390 IX86_BUILTIN_PADDQ128_MASK,
29391 IX86_BUILTIN_PSUBB128_MASK,
29392 IX86_BUILTIN_PSUBW128_MASK,
29393 IX86_BUILTIN_PSUBD128_MASK,
29394 IX86_BUILTIN_PSUBQ128_MASK,
29395 IX86_BUILTIN_PADDSB128_MASK,
29396 IX86_BUILTIN_PADDSW128_MASK,
29397 IX86_BUILTIN_PSUBSB128_MASK,
29398 IX86_BUILTIN_PSUBSW128_MASK,
29399 IX86_BUILTIN_PADDUSB128_MASK,
29400 IX86_BUILTIN_PADDUSW128_MASK,
29401 IX86_BUILTIN_PSUBUSB128_MASK,
29402 IX86_BUILTIN_PSUBUSW128_MASK,
29403 IX86_BUILTIN_PADDB256_MASK,
29404 IX86_BUILTIN_PADDW256_MASK,
29405 IX86_BUILTIN_PADDD256_MASK,
29406 IX86_BUILTIN_PADDQ256_MASK,
29407 IX86_BUILTIN_PADDSB256_MASK,
29408 IX86_BUILTIN_PADDSW256_MASK,
29409 IX86_BUILTIN_PADDUSB256_MASK,
29410 IX86_BUILTIN_PADDUSW256_MASK,
29411 IX86_BUILTIN_PSUBB256_MASK,
29412 IX86_BUILTIN_PSUBW256_MASK,
29413 IX86_BUILTIN_PSUBD256_MASK,
29414 IX86_BUILTIN_PSUBQ256_MASK,
29415 IX86_BUILTIN_PSUBSB256_MASK,
29416 IX86_BUILTIN_PSUBSW256_MASK,
29417 IX86_BUILTIN_PSUBUSB256_MASK,
29418 IX86_BUILTIN_PSUBUSW256_MASK,
29419 IX86_BUILTIN_SHUF_F64x2_256,
29420 IX86_BUILTIN_SHUF_I64x2_256,
29421 IX86_BUILTIN_SHUF_I32x4_256,
29422 IX86_BUILTIN_SHUF_F32x4_256,
29423 IX86_BUILTIN_PMOVWB128,
29424 IX86_BUILTIN_PMOVWB256,
29425 IX86_BUILTIN_PMOVSWB128,
29426 IX86_BUILTIN_PMOVSWB256,
29427 IX86_BUILTIN_PMOVUSWB128,
29428 IX86_BUILTIN_PMOVUSWB256,
29429 IX86_BUILTIN_PMOVDB128,
29430 IX86_BUILTIN_PMOVDB256,
29431 IX86_BUILTIN_PMOVSDB128,
29432 IX86_BUILTIN_PMOVSDB256,
29433 IX86_BUILTIN_PMOVUSDB128,
29434 IX86_BUILTIN_PMOVUSDB256,
29435 IX86_BUILTIN_PMOVDW128,
29436 IX86_BUILTIN_PMOVDW256,
29437 IX86_BUILTIN_PMOVSDW128,
29438 IX86_BUILTIN_PMOVSDW256,
29439 IX86_BUILTIN_PMOVUSDW128,
29440 IX86_BUILTIN_PMOVUSDW256,
29441 IX86_BUILTIN_PMOVQB128,
29442 IX86_BUILTIN_PMOVQB256,
29443 IX86_BUILTIN_PMOVSQB128,
29444 IX86_BUILTIN_PMOVSQB256,
29445 IX86_BUILTIN_PMOVUSQB128,
29446 IX86_BUILTIN_PMOVUSQB256,
29447 IX86_BUILTIN_PMOVQW128,
29448 IX86_BUILTIN_PMOVQW256,
29449 IX86_BUILTIN_PMOVSQW128,
29450 IX86_BUILTIN_PMOVSQW256,
29451 IX86_BUILTIN_PMOVUSQW128,
29452 IX86_BUILTIN_PMOVUSQW256,
29453 IX86_BUILTIN_PMOVQD128,
29454 IX86_BUILTIN_PMOVQD256,
29455 IX86_BUILTIN_PMOVSQD128,
29456 IX86_BUILTIN_PMOVSQD256,
29457 IX86_BUILTIN_PMOVUSQD128,
29458 IX86_BUILTIN_PMOVUSQD256,
29459 IX86_BUILTIN_RANGEPD256,
29460 IX86_BUILTIN_RANGEPD128,
29461 IX86_BUILTIN_RANGEPS256,
29462 IX86_BUILTIN_RANGEPS128,
29463 IX86_BUILTIN_GETEXPPS256,
29464 IX86_BUILTIN_GETEXPPD256,
29465 IX86_BUILTIN_GETEXPPS128,
29466 IX86_BUILTIN_GETEXPPD128,
29467 IX86_BUILTIN_FIXUPIMMPD256_MASK,
29468 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29469 IX86_BUILTIN_FIXUPIMMPS256_MASK,
29470 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29471 IX86_BUILTIN_FIXUPIMMPD128_MASK,
29472 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29473 IX86_BUILTIN_FIXUPIMMPS128_MASK,
29474 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29475 IX86_BUILTIN_PABSQ256,
29476 IX86_BUILTIN_PABSQ128,
29477 IX86_BUILTIN_PABSD256_MASK,
29478 IX86_BUILTIN_PABSD128_MASK,
29479 IX86_BUILTIN_PMULHRSW256_MASK,
29480 IX86_BUILTIN_PMULHRSW128_MASK,
29481 IX86_BUILTIN_PMULHUW128_MASK,
29482 IX86_BUILTIN_PMULHUW256_MASK,
29483 IX86_BUILTIN_PMULHW256_MASK,
29484 IX86_BUILTIN_PMULHW128_MASK,
29485 IX86_BUILTIN_PMULLW256_MASK,
29486 IX86_BUILTIN_PMULLW128_MASK,
29487 IX86_BUILTIN_PMULLQ256,
29488 IX86_BUILTIN_PMULLQ128,
29489 IX86_BUILTIN_ANDPD256_MASK,
29490 IX86_BUILTIN_ANDPD128_MASK,
29491 IX86_BUILTIN_ANDPS256_MASK,
29492 IX86_BUILTIN_ANDPS128_MASK,
29493 IX86_BUILTIN_ANDNPD256_MASK,
29494 IX86_BUILTIN_ANDNPD128_MASK,
29495 IX86_BUILTIN_ANDNPS256_MASK,
29496 IX86_BUILTIN_ANDNPS128_MASK,
29497 IX86_BUILTIN_PSLLWI128_MASK,
29498 IX86_BUILTIN_PSLLDI128_MASK,
29499 IX86_BUILTIN_PSLLQI128_MASK,
29500 IX86_BUILTIN_PSLLW128_MASK,
29501 IX86_BUILTIN_PSLLD128_MASK,
29502 IX86_BUILTIN_PSLLQ128_MASK,
29503 IX86_BUILTIN_PSLLWI256_MASK ,
29504 IX86_BUILTIN_PSLLW256_MASK,
29505 IX86_BUILTIN_PSLLDI256_MASK,
29506 IX86_BUILTIN_PSLLD256_MASK,
29507 IX86_BUILTIN_PSLLQI256_MASK,
29508 IX86_BUILTIN_PSLLQ256_MASK,
29509 IX86_BUILTIN_PSRADI128_MASK,
29510 IX86_BUILTIN_PSRAD128_MASK,
29511 IX86_BUILTIN_PSRADI256_MASK,
29512 IX86_BUILTIN_PSRAD256_MASK,
29513 IX86_BUILTIN_PSRAQI128_MASK,
29514 IX86_BUILTIN_PSRAQ128_MASK,
29515 IX86_BUILTIN_PSRAQI256_MASK,
29516 IX86_BUILTIN_PSRAQ256_MASK,
29517 IX86_BUILTIN_PANDD256,
29518 IX86_BUILTIN_PANDD128,
29519 IX86_BUILTIN_PSRLDI128_MASK,
29520 IX86_BUILTIN_PSRLD128_MASK,
29521 IX86_BUILTIN_PSRLDI256_MASK,
29522 IX86_BUILTIN_PSRLD256_MASK,
29523 IX86_BUILTIN_PSRLQI128_MASK,
29524 IX86_BUILTIN_PSRLQ128_MASK,
29525 IX86_BUILTIN_PSRLQI256_MASK,
29526 IX86_BUILTIN_PSRLQ256_MASK,
29527 IX86_BUILTIN_PANDQ256,
29528 IX86_BUILTIN_PANDQ128,
29529 IX86_BUILTIN_PANDND256,
29530 IX86_BUILTIN_PANDND128,
29531 IX86_BUILTIN_PANDNQ256,
29532 IX86_BUILTIN_PANDNQ128,
29533 IX86_BUILTIN_PORD256,
29534 IX86_BUILTIN_PORD128,
29535 IX86_BUILTIN_PORQ256,
29536 IX86_BUILTIN_PORQ128,
29537 IX86_BUILTIN_PXORD256,
29538 IX86_BUILTIN_PXORD128,
29539 IX86_BUILTIN_PXORQ256,
29540 IX86_BUILTIN_PXORQ128,
29541 IX86_BUILTIN_PACKSSWB256_MASK,
29542 IX86_BUILTIN_PACKSSWB128_MASK,
29543 IX86_BUILTIN_PACKUSWB256_MASK,
29544 IX86_BUILTIN_PACKUSWB128_MASK,
29545 IX86_BUILTIN_RNDSCALEPS256,
29546 IX86_BUILTIN_RNDSCALEPD256,
29547 IX86_BUILTIN_RNDSCALEPS128,
29548 IX86_BUILTIN_RNDSCALEPD128,
29549 IX86_BUILTIN_VTERNLOGQ256_MASK,
29550 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29551 IX86_BUILTIN_VTERNLOGD256_MASK,
29552 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29553 IX86_BUILTIN_VTERNLOGQ128_MASK,
29554 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29555 IX86_BUILTIN_VTERNLOGD128_MASK,
29556 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29557 IX86_BUILTIN_SCALEFPD256,
29558 IX86_BUILTIN_SCALEFPS256,
29559 IX86_BUILTIN_SCALEFPD128,
29560 IX86_BUILTIN_SCALEFPS128,
29561 IX86_BUILTIN_VFMADDPD256_MASK,
29562 IX86_BUILTIN_VFMADDPD256_MASK3,
29563 IX86_BUILTIN_VFMADDPD256_MASKZ,
29564 IX86_BUILTIN_VFMADDPD128_MASK,
29565 IX86_BUILTIN_VFMADDPD128_MASK3,
29566 IX86_BUILTIN_VFMADDPD128_MASKZ,
29567 IX86_BUILTIN_VFMADDPS256_MASK,
29568 IX86_BUILTIN_VFMADDPS256_MASK3,
29569 IX86_BUILTIN_VFMADDPS256_MASKZ,
29570 IX86_BUILTIN_VFMADDPS128_MASK,
29571 IX86_BUILTIN_VFMADDPS128_MASK3,
29572 IX86_BUILTIN_VFMADDPS128_MASKZ,
29573 IX86_BUILTIN_VFMSUBPD256_MASK3,
29574 IX86_BUILTIN_VFMSUBPD128_MASK3,
29575 IX86_BUILTIN_VFMSUBPS256_MASK3,
29576 IX86_BUILTIN_VFMSUBPS128_MASK3,
29577 IX86_BUILTIN_VFNMADDPD256_MASK,
29578 IX86_BUILTIN_VFNMADDPD128_MASK,
29579 IX86_BUILTIN_VFNMADDPS256_MASK,
29580 IX86_BUILTIN_VFNMADDPS128_MASK,
29581 IX86_BUILTIN_VFNMSUBPD256_MASK,
29582 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29583 IX86_BUILTIN_VFNMSUBPD128_MASK,
29584 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29585 IX86_BUILTIN_VFNMSUBPS256_MASK,
29586 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29587 IX86_BUILTIN_VFNMSUBPS128_MASK,
29588 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29589 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29590 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29591 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29592 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29593 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29594 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29595 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29596 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29597 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29598 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29599 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29600 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29601 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29602 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29603 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29604 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29605 IX86_BUILTIN_INSERTF64X2_256,
29606 IX86_BUILTIN_INSERTI64X2_256,
29607 IX86_BUILTIN_PSRAVV16HI,
29608 IX86_BUILTIN_PSRAVV8HI,
29609 IX86_BUILTIN_PMADDUBSW256_MASK,
29610 IX86_BUILTIN_PMADDUBSW128_MASK,
29611 IX86_BUILTIN_PMADDWD256_MASK,
29612 IX86_BUILTIN_PMADDWD128_MASK,
29613 IX86_BUILTIN_PSRLVV16HI,
29614 IX86_BUILTIN_PSRLVV8HI,
29615 IX86_BUILTIN_CVTPS2DQ256_MASK,
29616 IX86_BUILTIN_CVTPS2DQ128_MASK,
29617 IX86_BUILTIN_CVTPS2UDQ256,
29618 IX86_BUILTIN_CVTPS2UDQ128,
29619 IX86_BUILTIN_CVTPS2QQ256,
29620 IX86_BUILTIN_CVTPS2QQ128,
29621 IX86_BUILTIN_CVTPS2UQQ256,
29622 IX86_BUILTIN_CVTPS2UQQ128,
29623 IX86_BUILTIN_GETMANTPS256,
29624 IX86_BUILTIN_GETMANTPS128,
29625 IX86_BUILTIN_GETMANTPD256,
29626 IX86_BUILTIN_GETMANTPD128,
29627 IX86_BUILTIN_MOVDDUP256_MASK,
29628 IX86_BUILTIN_MOVDDUP128_MASK,
29629 IX86_BUILTIN_MOVSHDUP256_MASK,
29630 IX86_BUILTIN_MOVSHDUP128_MASK,
29631 IX86_BUILTIN_MOVSLDUP256_MASK,
29632 IX86_BUILTIN_MOVSLDUP128_MASK,
29633 IX86_BUILTIN_CVTQQ2PS256,
29634 IX86_BUILTIN_CVTQQ2PS128,
29635 IX86_BUILTIN_CVTUQQ2PS256,
29636 IX86_BUILTIN_CVTUQQ2PS128,
29637 IX86_BUILTIN_CVTQQ2PD256,
29638 IX86_BUILTIN_CVTQQ2PD128,
29639 IX86_BUILTIN_CVTUQQ2PD256,
29640 IX86_BUILTIN_CVTUQQ2PD128,
29641 IX86_BUILTIN_VPERMT2VARQ256,
29642 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29643 IX86_BUILTIN_VPERMT2VARD256,
29644 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29645 IX86_BUILTIN_VPERMI2VARQ256,
29646 IX86_BUILTIN_VPERMI2VARD256,
29647 IX86_BUILTIN_VPERMT2VARPD256,
29648 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29649 IX86_BUILTIN_VPERMT2VARPS256,
29650 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29651 IX86_BUILTIN_VPERMI2VARPD256,
29652 IX86_BUILTIN_VPERMI2VARPS256,
29653 IX86_BUILTIN_VPERMT2VARQ128,
29654 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29655 IX86_BUILTIN_VPERMT2VARD128,
29656 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29657 IX86_BUILTIN_VPERMI2VARQ128,
29658 IX86_BUILTIN_VPERMI2VARD128,
29659 IX86_BUILTIN_VPERMT2VARPD128,
29660 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29661 IX86_BUILTIN_VPERMT2VARPS128,
29662 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29663 IX86_BUILTIN_VPERMI2VARPD128,
29664 IX86_BUILTIN_VPERMI2VARPS128,
29665 IX86_BUILTIN_PSHUFB256_MASK,
29666 IX86_BUILTIN_PSHUFB128_MASK,
29667 IX86_BUILTIN_PSHUFHW256_MASK,
29668 IX86_BUILTIN_PSHUFHW128_MASK,
29669 IX86_BUILTIN_PSHUFLW256_MASK,
29670 IX86_BUILTIN_PSHUFLW128_MASK,
29671 IX86_BUILTIN_PSHUFD256_MASK,
29672 IX86_BUILTIN_PSHUFD128_MASK,
29673 IX86_BUILTIN_SHUFPD256_MASK,
29674 IX86_BUILTIN_SHUFPD128_MASK,
29675 IX86_BUILTIN_SHUFPS256_MASK,
29676 IX86_BUILTIN_SHUFPS128_MASK,
29677 IX86_BUILTIN_PROLVQ256,
29678 IX86_BUILTIN_PROLVQ128,
29679 IX86_BUILTIN_PROLQ256,
29680 IX86_BUILTIN_PROLQ128,
29681 IX86_BUILTIN_PRORVQ256,
29682 IX86_BUILTIN_PRORVQ128,
29683 IX86_BUILTIN_PRORQ256,
29684 IX86_BUILTIN_PRORQ128,
29685 IX86_BUILTIN_PSRAVQ128,
29686 IX86_BUILTIN_PSRAVQ256,
29687 IX86_BUILTIN_PSLLVV4DI_MASK,
29688 IX86_BUILTIN_PSLLVV2DI_MASK,
29689 IX86_BUILTIN_PSLLVV8SI_MASK,
29690 IX86_BUILTIN_PSLLVV4SI_MASK,
29691 IX86_BUILTIN_PSRAVV8SI_MASK,
29692 IX86_BUILTIN_PSRAVV4SI_MASK,
29693 IX86_BUILTIN_PSRLVV4DI_MASK,
29694 IX86_BUILTIN_PSRLVV2DI_MASK,
29695 IX86_BUILTIN_PSRLVV8SI_MASK,
29696 IX86_BUILTIN_PSRLVV4SI_MASK,
29697 IX86_BUILTIN_PSRAWI256_MASK,
29698 IX86_BUILTIN_PSRAW256_MASK,
29699 IX86_BUILTIN_PSRAWI128_MASK,
29700 IX86_BUILTIN_PSRAW128_MASK,
29701 IX86_BUILTIN_PSRLWI256_MASK,
29702 IX86_BUILTIN_PSRLW256_MASK,
29703 IX86_BUILTIN_PSRLWI128_MASK,
29704 IX86_BUILTIN_PSRLW128_MASK,
29705 IX86_BUILTIN_PRORVD256,
29706 IX86_BUILTIN_PROLVD256,
29707 IX86_BUILTIN_PRORD256,
29708 IX86_BUILTIN_PROLD256,
29709 IX86_BUILTIN_PRORVD128,
29710 IX86_BUILTIN_PROLVD128,
29711 IX86_BUILTIN_PRORD128,
29712 IX86_BUILTIN_PROLD128,
29713 IX86_BUILTIN_FPCLASSPD256,
29714 IX86_BUILTIN_FPCLASSPD128,
29715 IX86_BUILTIN_FPCLASSSD,
29716 IX86_BUILTIN_FPCLASSPS256,
29717 IX86_BUILTIN_FPCLASSPS128,
29718 IX86_BUILTIN_FPCLASSSS,
29719 IX86_BUILTIN_CVTB2MASK128,
29720 IX86_BUILTIN_CVTB2MASK256,
29721 IX86_BUILTIN_CVTW2MASK128,
29722 IX86_BUILTIN_CVTW2MASK256,
29723 IX86_BUILTIN_CVTD2MASK128,
29724 IX86_BUILTIN_CVTD2MASK256,
29725 IX86_BUILTIN_CVTQ2MASK128,
29726 IX86_BUILTIN_CVTQ2MASK256,
29727 IX86_BUILTIN_CVTMASK2B128,
29728 IX86_BUILTIN_CVTMASK2B256,
29729 IX86_BUILTIN_CVTMASK2W128,
29730 IX86_BUILTIN_CVTMASK2W256,
29731 IX86_BUILTIN_CVTMASK2D128,
29732 IX86_BUILTIN_CVTMASK2D256,
29733 IX86_BUILTIN_CVTMASK2Q128,
29734 IX86_BUILTIN_CVTMASK2Q256,
29735 IX86_BUILTIN_PCMPEQB128_MASK,
29736 IX86_BUILTIN_PCMPEQB256_MASK,
29737 IX86_BUILTIN_PCMPEQW128_MASK,
29738 IX86_BUILTIN_PCMPEQW256_MASK,
29739 IX86_BUILTIN_PCMPEQD128_MASK,
29740 IX86_BUILTIN_PCMPEQD256_MASK,
29741 IX86_BUILTIN_PCMPEQQ128_MASK,
29742 IX86_BUILTIN_PCMPEQQ256_MASK,
29743 IX86_BUILTIN_PCMPGTB128_MASK,
29744 IX86_BUILTIN_PCMPGTB256_MASK,
29745 IX86_BUILTIN_PCMPGTW128_MASK,
29746 IX86_BUILTIN_PCMPGTW256_MASK,
29747 IX86_BUILTIN_PCMPGTD128_MASK,
29748 IX86_BUILTIN_PCMPGTD256_MASK,
29749 IX86_BUILTIN_PCMPGTQ128_MASK,
29750 IX86_BUILTIN_PCMPGTQ256_MASK,
29751 IX86_BUILTIN_PTESTMB128,
29752 IX86_BUILTIN_PTESTMB256,
29753 IX86_BUILTIN_PTESTMW128,
29754 IX86_BUILTIN_PTESTMW256,
29755 IX86_BUILTIN_PTESTMD128,
29756 IX86_BUILTIN_PTESTMD256,
29757 IX86_BUILTIN_PTESTMQ128,
29758 IX86_BUILTIN_PTESTMQ256,
29759 IX86_BUILTIN_PTESTNMB128,
29760 IX86_BUILTIN_PTESTNMB256,
29761 IX86_BUILTIN_PTESTNMW128,
29762 IX86_BUILTIN_PTESTNMW256,
29763 IX86_BUILTIN_PTESTNMD128,
29764 IX86_BUILTIN_PTESTNMD256,
29765 IX86_BUILTIN_PTESTNMQ128,
29766 IX86_BUILTIN_PTESTNMQ256,
29767 IX86_BUILTIN_PBROADCASTMB128,
29768 IX86_BUILTIN_PBROADCASTMB256,
29769 IX86_BUILTIN_PBROADCASTMW128,
29770 IX86_BUILTIN_PBROADCASTMW256,
29771 IX86_BUILTIN_COMPRESSPD256,
29772 IX86_BUILTIN_COMPRESSPD128,
29773 IX86_BUILTIN_COMPRESSPS256,
29774 IX86_BUILTIN_COMPRESSPS128,
29775 IX86_BUILTIN_PCOMPRESSQ256,
29776 IX86_BUILTIN_PCOMPRESSQ128,
29777 IX86_BUILTIN_PCOMPRESSD256,
29778 IX86_BUILTIN_PCOMPRESSD128,
29779 IX86_BUILTIN_EXPANDPD256,
29780 IX86_BUILTIN_EXPANDPD128,
29781 IX86_BUILTIN_EXPANDPS256,
29782 IX86_BUILTIN_EXPANDPS128,
29783 IX86_BUILTIN_PEXPANDQ256,
29784 IX86_BUILTIN_PEXPANDQ128,
29785 IX86_BUILTIN_PEXPANDD256,
29786 IX86_BUILTIN_PEXPANDD128,
29787 IX86_BUILTIN_EXPANDPD256Z,
29788 IX86_BUILTIN_EXPANDPD128Z,
29789 IX86_BUILTIN_EXPANDPS256Z,
29790 IX86_BUILTIN_EXPANDPS128Z,
29791 IX86_BUILTIN_PEXPANDQ256Z,
29792 IX86_BUILTIN_PEXPANDQ128Z,
29793 IX86_BUILTIN_PEXPANDD256Z,
29794 IX86_BUILTIN_PEXPANDD128Z,
29795 IX86_BUILTIN_PMAXSD256_MASK,
29796 IX86_BUILTIN_PMINSD256_MASK,
29797 IX86_BUILTIN_PMAXUD256_MASK,
29798 IX86_BUILTIN_PMINUD256_MASK,
29799 IX86_BUILTIN_PMAXSD128_MASK,
29800 IX86_BUILTIN_PMINSD128_MASK,
29801 IX86_BUILTIN_PMAXUD128_MASK,
29802 IX86_BUILTIN_PMINUD128_MASK,
29803 IX86_BUILTIN_PMAXSQ256_MASK,
29804 IX86_BUILTIN_PMINSQ256_MASK,
29805 IX86_BUILTIN_PMAXUQ256_MASK,
29806 IX86_BUILTIN_PMINUQ256_MASK,
29807 IX86_BUILTIN_PMAXSQ128_MASK,
29808 IX86_BUILTIN_PMINSQ128_MASK,
29809 IX86_BUILTIN_PMAXUQ128_MASK,
29810 IX86_BUILTIN_PMINUQ128_MASK,
29811 IX86_BUILTIN_PMINSB256_MASK,
29812 IX86_BUILTIN_PMINUB256_MASK,
29813 IX86_BUILTIN_PMAXSB256_MASK,
29814 IX86_BUILTIN_PMAXUB256_MASK,
29815 IX86_BUILTIN_PMINSB128_MASK,
29816 IX86_BUILTIN_PMINUB128_MASK,
29817 IX86_BUILTIN_PMAXSB128_MASK,
29818 IX86_BUILTIN_PMAXUB128_MASK,
29819 IX86_BUILTIN_PMINSW256_MASK,
29820 IX86_BUILTIN_PMINUW256_MASK,
29821 IX86_BUILTIN_PMAXSW256_MASK,
29822 IX86_BUILTIN_PMAXUW256_MASK,
29823 IX86_BUILTIN_PMINSW128_MASK,
29824 IX86_BUILTIN_PMINUW128_MASK,
29825 IX86_BUILTIN_PMAXSW128_MASK,
29826 IX86_BUILTIN_PMAXUW128_MASK,
29827 IX86_BUILTIN_VPCONFLICTQ256,
29828 IX86_BUILTIN_VPCONFLICTD256,
29829 IX86_BUILTIN_VPCLZCNTQ256,
29830 IX86_BUILTIN_VPCLZCNTD256,
29831 IX86_BUILTIN_UNPCKHPD256_MASK,
29832 IX86_BUILTIN_UNPCKHPD128_MASK,
29833 IX86_BUILTIN_UNPCKHPS256_MASK,
29834 IX86_BUILTIN_UNPCKHPS128_MASK,
29835 IX86_BUILTIN_UNPCKLPD256_MASK,
29836 IX86_BUILTIN_UNPCKLPD128_MASK,
29837 IX86_BUILTIN_UNPCKLPS256_MASK,
29838 IX86_BUILTIN_VPCONFLICTQ128,
29839 IX86_BUILTIN_VPCONFLICTD128,
29840 IX86_BUILTIN_VPCLZCNTQ128,
29841 IX86_BUILTIN_VPCLZCNTD128,
29842 IX86_BUILTIN_UNPCKLPS128_MASK,
29843 IX86_BUILTIN_ALIGND256,
29844 IX86_BUILTIN_ALIGNQ256,
29845 IX86_BUILTIN_ALIGND128,
29846 IX86_BUILTIN_ALIGNQ128,
29847 IX86_BUILTIN_CVTPS2PH256_MASK,
29848 IX86_BUILTIN_CVTPS2PH_MASK,
29849 IX86_BUILTIN_CVTPH2PS_MASK,
29850 IX86_BUILTIN_CVTPH2PS256_MASK,
29851 IX86_BUILTIN_PUNPCKHDQ128_MASK,
29852 IX86_BUILTIN_PUNPCKHDQ256_MASK,
29853 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
29854 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
29855 IX86_BUILTIN_PUNPCKLDQ128_MASK,
29856 IX86_BUILTIN_PUNPCKLDQ256_MASK,
29857 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
29858 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
29859 IX86_BUILTIN_PUNPCKHBW128_MASK,
29860 IX86_BUILTIN_PUNPCKHBW256_MASK,
29861 IX86_BUILTIN_PUNPCKHWD128_MASK,
29862 IX86_BUILTIN_PUNPCKHWD256_MASK,
29863 IX86_BUILTIN_PUNPCKLBW128_MASK,
29864 IX86_BUILTIN_PUNPCKLBW256_MASK,
29865 IX86_BUILTIN_PUNPCKLWD128_MASK,
29866 IX86_BUILTIN_PUNPCKLWD256_MASK,
29867 IX86_BUILTIN_PSLLVV16HI,
29868 IX86_BUILTIN_PSLLVV8HI,
29869 IX86_BUILTIN_PACKSSDW256_MASK,
29870 IX86_BUILTIN_PACKSSDW128_MASK,
29871 IX86_BUILTIN_PACKUSDW256_MASK,
29872 IX86_BUILTIN_PACKUSDW128_MASK,
29873 IX86_BUILTIN_PAVGB256_MASK,
29874 IX86_BUILTIN_PAVGW256_MASK,
29875 IX86_BUILTIN_PAVGB128_MASK,
29876 IX86_BUILTIN_PAVGW128_MASK,
29877 IX86_BUILTIN_VPERMVARSF256_MASK,
29878 IX86_BUILTIN_VPERMVARDF256_MASK,
29879 IX86_BUILTIN_VPERMDF256_MASK,
29880 IX86_BUILTIN_PABSB256_MASK,
29881 IX86_BUILTIN_PABSB128_MASK,
29882 IX86_BUILTIN_PABSW256_MASK,
29883 IX86_BUILTIN_PABSW128_MASK,
29884 IX86_BUILTIN_VPERMILVARPD_MASK,
29885 IX86_BUILTIN_VPERMILVARPS_MASK,
29886 IX86_BUILTIN_VPERMILVARPD256_MASK,
29887 IX86_BUILTIN_VPERMILVARPS256_MASK,
29888 IX86_BUILTIN_VPERMILPD_MASK,
29889 IX86_BUILTIN_VPERMILPS_MASK,
29890 IX86_BUILTIN_VPERMILPD256_MASK,
29891 IX86_BUILTIN_VPERMILPS256_MASK,
29892 IX86_BUILTIN_BLENDMQ256,
29893 IX86_BUILTIN_BLENDMD256,
29894 IX86_BUILTIN_BLENDMPD256,
29895 IX86_BUILTIN_BLENDMPS256,
29896 IX86_BUILTIN_BLENDMQ128,
29897 IX86_BUILTIN_BLENDMD128,
29898 IX86_BUILTIN_BLENDMPD128,
29899 IX86_BUILTIN_BLENDMPS128,
29900 IX86_BUILTIN_BLENDMW256,
29901 IX86_BUILTIN_BLENDMB256,
29902 IX86_BUILTIN_BLENDMW128,
29903 IX86_BUILTIN_BLENDMB128,
29904 IX86_BUILTIN_PMULLD256_MASK,
29905 IX86_BUILTIN_PMULLD128_MASK,
29906 IX86_BUILTIN_PMULUDQ256_MASK,
29907 IX86_BUILTIN_PMULDQ256_MASK,
29908 IX86_BUILTIN_PMULDQ128_MASK,
29909 IX86_BUILTIN_PMULUDQ128_MASK,
29910 IX86_BUILTIN_CVTPD2PS256_MASK,
29911 IX86_BUILTIN_CVTPD2PS_MASK,
29912 IX86_BUILTIN_VPERMVARSI256_MASK,
29913 IX86_BUILTIN_VPERMVARDI256_MASK,
29914 IX86_BUILTIN_VPERMDI256_MASK,
29915 IX86_BUILTIN_CMPQ256,
29916 IX86_BUILTIN_CMPD256,
29917 IX86_BUILTIN_UCMPQ256,
29918 IX86_BUILTIN_UCMPD256,
29919 IX86_BUILTIN_CMPB256,
29920 IX86_BUILTIN_CMPW256,
29921 IX86_BUILTIN_UCMPB256,
29922 IX86_BUILTIN_UCMPW256,
29923 IX86_BUILTIN_CMPPD256_MASK,
29924 IX86_BUILTIN_CMPPS256_MASK,
29925 IX86_BUILTIN_CMPQ128,
29926 IX86_BUILTIN_CMPD128,
29927 IX86_BUILTIN_UCMPQ128,
29928 IX86_BUILTIN_UCMPD128,
29929 IX86_BUILTIN_CMPB128,
29930 IX86_BUILTIN_CMPW128,
29931 IX86_BUILTIN_UCMPB128,
29932 IX86_BUILTIN_UCMPW128,
29933 IX86_BUILTIN_CMPPD128_MASK,
29934 IX86_BUILTIN_CMPPS128_MASK,
29936 IX86_BUILTIN_GATHER3SIV8SF,
29937 IX86_BUILTIN_GATHER3SIV4SF,
29938 IX86_BUILTIN_GATHER3SIV4DF,
29939 IX86_BUILTIN_GATHER3SIV2DF,
29940 IX86_BUILTIN_GATHER3DIV8SF,
29941 IX86_BUILTIN_GATHER3DIV4SF,
29942 IX86_BUILTIN_GATHER3DIV4DF,
29943 IX86_BUILTIN_GATHER3DIV2DF,
29944 IX86_BUILTIN_GATHER3SIV8SI,
29945 IX86_BUILTIN_GATHER3SIV4SI,
29946 IX86_BUILTIN_GATHER3SIV4DI,
29947 IX86_BUILTIN_GATHER3SIV2DI,
29948 IX86_BUILTIN_GATHER3DIV8SI,
29949 IX86_BUILTIN_GATHER3DIV4SI,
29950 IX86_BUILTIN_GATHER3DIV4DI,
29951 IX86_BUILTIN_GATHER3DIV2DI,
29952 IX86_BUILTIN_SCATTERSIV8SF,
29953 IX86_BUILTIN_SCATTERSIV4SF,
29954 IX86_BUILTIN_SCATTERSIV4DF,
29955 IX86_BUILTIN_SCATTERSIV2DF,
29956 IX86_BUILTIN_SCATTERDIV8SF,
29957 IX86_BUILTIN_SCATTERDIV4SF,
29958 IX86_BUILTIN_SCATTERDIV4DF,
29959 IX86_BUILTIN_SCATTERDIV2DF,
29960 IX86_BUILTIN_SCATTERSIV8SI,
29961 IX86_BUILTIN_SCATTERSIV4SI,
29962 IX86_BUILTIN_SCATTERSIV4DI,
29963 IX86_BUILTIN_SCATTERSIV2DI,
29964 IX86_BUILTIN_SCATTERDIV8SI,
29965 IX86_BUILTIN_SCATTERDIV4SI,
29966 IX86_BUILTIN_SCATTERDIV4DI,
29967 IX86_BUILTIN_SCATTERDIV2DI,
29969 /* AVX512DQ. */
29970 IX86_BUILTIN_RANGESD128,
29971 IX86_BUILTIN_RANGESS128,
29972 IX86_BUILTIN_KUNPCKWD,
29973 IX86_BUILTIN_KUNPCKDQ,
29974 IX86_BUILTIN_BROADCASTF32x2_512,
29975 IX86_BUILTIN_BROADCASTI32x2_512,
29976 IX86_BUILTIN_BROADCASTF64X2_512,
29977 IX86_BUILTIN_BROADCASTI64X2_512,
29978 IX86_BUILTIN_BROADCASTF32X8_512,
29979 IX86_BUILTIN_BROADCASTI32X8_512,
29980 IX86_BUILTIN_EXTRACTF64X2_512,
29981 IX86_BUILTIN_EXTRACTF32X8,
29982 IX86_BUILTIN_EXTRACTI64X2_512,
29983 IX86_BUILTIN_EXTRACTI32X8,
29984 IX86_BUILTIN_REDUCEPD512_MASK,
29985 IX86_BUILTIN_REDUCEPS512_MASK,
29986 IX86_BUILTIN_PMULLQ512,
29987 IX86_BUILTIN_XORPD512,
29988 IX86_BUILTIN_XORPS512,
29989 IX86_BUILTIN_ORPD512,
29990 IX86_BUILTIN_ORPS512,
29991 IX86_BUILTIN_ANDPD512,
29992 IX86_BUILTIN_ANDPS512,
29993 IX86_BUILTIN_ANDNPD512,
29994 IX86_BUILTIN_ANDNPS512,
29995 IX86_BUILTIN_INSERTF32X8,
29996 IX86_BUILTIN_INSERTI32X8,
29997 IX86_BUILTIN_INSERTF64X2_512,
29998 IX86_BUILTIN_INSERTI64X2_512,
29999 IX86_BUILTIN_FPCLASSPD512,
30000 IX86_BUILTIN_FPCLASSPS512,
30001 IX86_BUILTIN_CVTD2MASK512,
30002 IX86_BUILTIN_CVTQ2MASK512,
30003 IX86_BUILTIN_CVTMASK2D512,
30004 IX86_BUILTIN_CVTMASK2Q512,
30005 IX86_BUILTIN_CVTPD2QQ512,
30006 IX86_BUILTIN_CVTPS2QQ512,
30007 IX86_BUILTIN_CVTPD2UQQ512,
30008 IX86_BUILTIN_CVTPS2UQQ512,
30009 IX86_BUILTIN_CVTQQ2PS512,
30010 IX86_BUILTIN_CVTUQQ2PS512,
30011 IX86_BUILTIN_CVTQQ2PD512,
30012 IX86_BUILTIN_CVTUQQ2PD512,
30013 IX86_BUILTIN_CVTTPS2QQ512,
30014 IX86_BUILTIN_CVTTPS2UQQ512,
30015 IX86_BUILTIN_CVTTPD2QQ512,
30016 IX86_BUILTIN_CVTTPD2UQQ512,
30017 IX86_BUILTIN_RANGEPS512,
30018 IX86_BUILTIN_RANGEPD512,
30020 /* AVX512BW. */
30021 IX86_BUILTIN_PACKUSDW512,
30022 IX86_BUILTIN_PACKSSDW512,
30023 IX86_BUILTIN_LOADDQUHI512_MASK,
30024 IX86_BUILTIN_LOADDQUQI512_MASK,
30025 IX86_BUILTIN_PSLLDQ512,
30026 IX86_BUILTIN_PSRLDQ512,
30027 IX86_BUILTIN_STOREDQUHI512_MASK,
30028 IX86_BUILTIN_STOREDQUQI512_MASK,
30029 IX86_BUILTIN_PALIGNR512,
30030 IX86_BUILTIN_PALIGNR512_MASK,
30031 IX86_BUILTIN_MOVDQUHI512_MASK,
30032 IX86_BUILTIN_MOVDQUQI512_MASK,
30033 IX86_BUILTIN_PSADBW512,
30034 IX86_BUILTIN_DBPSADBW512,
30035 IX86_BUILTIN_PBROADCASTB512,
30036 IX86_BUILTIN_PBROADCASTB512_GPR,
30037 IX86_BUILTIN_PBROADCASTW512,
30038 IX86_BUILTIN_PBROADCASTW512_GPR,
30039 IX86_BUILTIN_PMOVSXBW512_MASK,
30040 IX86_BUILTIN_PMOVZXBW512_MASK,
30041 IX86_BUILTIN_VPERMVARHI512_MASK,
30042 IX86_BUILTIN_VPERMT2VARHI512,
30043 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
30044 IX86_BUILTIN_VPERMI2VARHI512,
30045 IX86_BUILTIN_PAVGB512,
30046 IX86_BUILTIN_PAVGW512,
30047 IX86_BUILTIN_PADDB512,
30048 IX86_BUILTIN_PSUBB512,
30049 IX86_BUILTIN_PSUBSB512,
30050 IX86_BUILTIN_PADDSB512,
30051 IX86_BUILTIN_PSUBUSB512,
30052 IX86_BUILTIN_PADDUSB512,
30053 IX86_BUILTIN_PSUBW512,
30054 IX86_BUILTIN_PADDW512,
30055 IX86_BUILTIN_PSUBSW512,
30056 IX86_BUILTIN_PADDSW512,
30057 IX86_BUILTIN_PSUBUSW512,
30058 IX86_BUILTIN_PADDUSW512,
30059 IX86_BUILTIN_PMAXUW512,
30060 IX86_BUILTIN_PMAXSW512,
30061 IX86_BUILTIN_PMINUW512,
30062 IX86_BUILTIN_PMINSW512,
30063 IX86_BUILTIN_PMAXUB512,
30064 IX86_BUILTIN_PMAXSB512,
30065 IX86_BUILTIN_PMINUB512,
30066 IX86_BUILTIN_PMINSB512,
30067 IX86_BUILTIN_PMOVWB512,
30068 IX86_BUILTIN_PMOVSWB512,
30069 IX86_BUILTIN_PMOVUSWB512,
30070 IX86_BUILTIN_PMULHRSW512_MASK,
30071 IX86_BUILTIN_PMULHUW512_MASK,
30072 IX86_BUILTIN_PMULHW512_MASK,
30073 IX86_BUILTIN_PMULLW512_MASK,
30074 IX86_BUILTIN_PSLLWI512_MASK,
30075 IX86_BUILTIN_PSLLW512_MASK,
30076 IX86_BUILTIN_PACKSSWB512,
30077 IX86_BUILTIN_PACKUSWB512,
30078 IX86_BUILTIN_PSRAVV32HI,
30079 IX86_BUILTIN_PMADDUBSW512_MASK,
30080 IX86_BUILTIN_PMADDWD512_MASK,
30081 IX86_BUILTIN_PSRLVV32HI,
30082 IX86_BUILTIN_PUNPCKHBW512,
30083 IX86_BUILTIN_PUNPCKHWD512,
30084 IX86_BUILTIN_PUNPCKLBW512,
30085 IX86_BUILTIN_PUNPCKLWD512,
30086 IX86_BUILTIN_PSHUFB512,
30087 IX86_BUILTIN_PSHUFHW512,
30088 IX86_BUILTIN_PSHUFLW512,
30089 IX86_BUILTIN_PSRAWI512,
30090 IX86_BUILTIN_PSRAW512,
30091 IX86_BUILTIN_PSRLWI512,
30092 IX86_BUILTIN_PSRLW512,
30093 IX86_BUILTIN_CVTB2MASK512,
30094 IX86_BUILTIN_CVTW2MASK512,
30095 IX86_BUILTIN_CVTMASK2B512,
30096 IX86_BUILTIN_CVTMASK2W512,
30097 IX86_BUILTIN_PCMPEQB512_MASK,
30098 IX86_BUILTIN_PCMPEQW512_MASK,
30099 IX86_BUILTIN_PCMPGTB512_MASK,
30100 IX86_BUILTIN_PCMPGTW512_MASK,
30101 IX86_BUILTIN_PTESTMB512,
30102 IX86_BUILTIN_PTESTMW512,
30103 IX86_BUILTIN_PTESTNMB512,
30104 IX86_BUILTIN_PTESTNMW512,
30105 IX86_BUILTIN_PSLLVV32HI,
30106 IX86_BUILTIN_PABSB512,
30107 IX86_BUILTIN_PABSW512,
30108 IX86_BUILTIN_BLENDMW512,
30109 IX86_BUILTIN_BLENDMB512,
30110 IX86_BUILTIN_CMPB512,
30111 IX86_BUILTIN_CMPW512,
30112 IX86_BUILTIN_UCMPB512,
30113 IX86_BUILTIN_UCMPW512,
30115 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30116 where all operands are 32-byte or 64-byte wide respectively. */
30117 IX86_BUILTIN_GATHERALTSIV4DF,
30118 IX86_BUILTIN_GATHERALTDIV8SF,
30119 IX86_BUILTIN_GATHERALTSIV4DI,
30120 IX86_BUILTIN_GATHERALTDIV8SI,
30121 IX86_BUILTIN_GATHER3ALTDIV16SF,
30122 IX86_BUILTIN_GATHER3ALTDIV16SI,
30123 IX86_BUILTIN_GATHER3ALTSIV4DF,
30124 IX86_BUILTIN_GATHER3ALTDIV8SF,
30125 IX86_BUILTIN_GATHER3ALTSIV4DI,
30126 IX86_BUILTIN_GATHER3ALTDIV8SI,
30127 IX86_BUILTIN_GATHER3ALTSIV8DF,
30128 IX86_BUILTIN_GATHER3ALTSIV8DI,
30129 IX86_BUILTIN_GATHER3DIV16SF,
30130 IX86_BUILTIN_GATHER3DIV16SI,
30131 IX86_BUILTIN_GATHER3DIV8DF,
30132 IX86_BUILTIN_GATHER3DIV8DI,
30133 IX86_BUILTIN_GATHER3SIV16SF,
30134 IX86_BUILTIN_GATHER3SIV16SI,
30135 IX86_BUILTIN_GATHER3SIV8DF,
30136 IX86_BUILTIN_GATHER3SIV8DI,
30137 IX86_BUILTIN_SCATTERDIV16SF,
30138 IX86_BUILTIN_SCATTERDIV16SI,
30139 IX86_BUILTIN_SCATTERDIV8DF,
30140 IX86_BUILTIN_SCATTERDIV8DI,
30141 IX86_BUILTIN_SCATTERSIV16SF,
30142 IX86_BUILTIN_SCATTERSIV16SI,
30143 IX86_BUILTIN_SCATTERSIV8DF,
30144 IX86_BUILTIN_SCATTERSIV8DI,
30146 /* AVX512PF */
30147 IX86_BUILTIN_GATHERPFQPD,
30148 IX86_BUILTIN_GATHERPFDPS,
30149 IX86_BUILTIN_GATHERPFDPD,
30150 IX86_BUILTIN_GATHERPFQPS,
30151 IX86_BUILTIN_SCATTERPFDPD,
30152 IX86_BUILTIN_SCATTERPFDPS,
30153 IX86_BUILTIN_SCATTERPFQPD,
30154 IX86_BUILTIN_SCATTERPFQPS,
30156 /* AVX-512ER */
30157 IX86_BUILTIN_EXP2PD_MASK,
30158 IX86_BUILTIN_EXP2PS_MASK,
30159 IX86_BUILTIN_EXP2PS,
30160 IX86_BUILTIN_RCP28PD,
30161 IX86_BUILTIN_RCP28PS,
30162 IX86_BUILTIN_RCP28SD,
30163 IX86_BUILTIN_RCP28SS,
30164 IX86_BUILTIN_RSQRT28PD,
30165 IX86_BUILTIN_RSQRT28PS,
30166 IX86_BUILTIN_RSQRT28SD,
30167 IX86_BUILTIN_RSQRT28SS,
30169 /* AVX-512IFMA */
30170 IX86_BUILTIN_VPMADD52LUQ512,
30171 IX86_BUILTIN_VPMADD52HUQ512,
30172 IX86_BUILTIN_VPMADD52LUQ256,
30173 IX86_BUILTIN_VPMADD52HUQ256,
30174 IX86_BUILTIN_VPMADD52LUQ128,
30175 IX86_BUILTIN_VPMADD52HUQ128,
30176 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
30177 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
30178 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
30179 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
30180 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
30181 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
30183 /* AVX-512VBMI */
30184 IX86_BUILTIN_VPMULTISHIFTQB512,
30185 IX86_BUILTIN_VPMULTISHIFTQB256,
30186 IX86_BUILTIN_VPMULTISHIFTQB128,
30187 IX86_BUILTIN_VPERMVARQI512_MASK,
30188 IX86_BUILTIN_VPERMT2VARQI512,
30189 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
30190 IX86_BUILTIN_VPERMI2VARQI512,
30191 IX86_BUILTIN_VPERMVARQI256_MASK,
30192 IX86_BUILTIN_VPERMVARQI128_MASK,
30193 IX86_BUILTIN_VPERMT2VARQI256,
30194 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
30195 IX86_BUILTIN_VPERMT2VARQI128,
30196 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
30197 IX86_BUILTIN_VPERMI2VARQI256,
30198 IX86_BUILTIN_VPERMI2VARQI128,
30200 /* SHA builtins. */
30201 IX86_BUILTIN_SHA1MSG1,
30202 IX86_BUILTIN_SHA1MSG2,
30203 IX86_BUILTIN_SHA1NEXTE,
30204 IX86_BUILTIN_SHA1RNDS4,
30205 IX86_BUILTIN_SHA256MSG1,
30206 IX86_BUILTIN_SHA256MSG2,
30207 IX86_BUILTIN_SHA256RNDS2,
30209 /* CLWB instructions. */
30210 IX86_BUILTIN_CLWB,
30212 /* PCOMMIT instructions. */
30213 IX86_BUILTIN_PCOMMIT,
30215 /* CLFLUSHOPT instructions. */
30216 IX86_BUILTIN_CLFLUSHOPT,
30218 /* TFmode support builtins. */
30219 IX86_BUILTIN_INFQ,
30220 IX86_BUILTIN_HUGE_VALQ,
30221 IX86_BUILTIN_FABSQ,
30222 IX86_BUILTIN_COPYSIGNQ,
30224 /* Vectorizer support builtins. */
30225 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30226 IX86_BUILTIN_CPYSGNPS,
30227 IX86_BUILTIN_CPYSGNPD,
30228 IX86_BUILTIN_CPYSGNPS256,
30229 IX86_BUILTIN_CPYSGNPS512,
30230 IX86_BUILTIN_CPYSGNPD256,
30231 IX86_BUILTIN_CPYSGNPD512,
30232 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30233 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30236 /* FMA4 instructions. */
30237 IX86_BUILTIN_VFMADDSS,
30238 IX86_BUILTIN_VFMADDSD,
30239 IX86_BUILTIN_VFMADDPS,
30240 IX86_BUILTIN_VFMADDPD,
30241 IX86_BUILTIN_VFMADDPS256,
30242 IX86_BUILTIN_VFMADDPD256,
30243 IX86_BUILTIN_VFMADDSUBPS,
30244 IX86_BUILTIN_VFMADDSUBPD,
30245 IX86_BUILTIN_VFMADDSUBPS256,
30246 IX86_BUILTIN_VFMADDSUBPD256,
30248 /* FMA3 instructions. */
30249 IX86_BUILTIN_VFMADDSS3,
30250 IX86_BUILTIN_VFMADDSD3,
30252 /* XOP instructions. */
30253 IX86_BUILTIN_VPCMOV,
30254 IX86_BUILTIN_VPCMOV_V2DI,
30255 IX86_BUILTIN_VPCMOV_V4SI,
30256 IX86_BUILTIN_VPCMOV_V8HI,
30257 IX86_BUILTIN_VPCMOV_V16QI,
30258 IX86_BUILTIN_VPCMOV_V4SF,
30259 IX86_BUILTIN_VPCMOV_V2DF,
30260 IX86_BUILTIN_VPCMOV256,
30261 IX86_BUILTIN_VPCMOV_V4DI256,
30262 IX86_BUILTIN_VPCMOV_V8SI256,
30263 IX86_BUILTIN_VPCMOV_V16HI256,
30264 IX86_BUILTIN_VPCMOV_V32QI256,
30265 IX86_BUILTIN_VPCMOV_V8SF256,
30266 IX86_BUILTIN_VPCMOV_V4DF256,
30268 IX86_BUILTIN_VPPERM,
30270 IX86_BUILTIN_VPMACSSWW,
30271 IX86_BUILTIN_VPMACSWW,
30272 IX86_BUILTIN_VPMACSSWD,
30273 IX86_BUILTIN_VPMACSWD,
30274 IX86_BUILTIN_VPMACSSDD,
30275 IX86_BUILTIN_VPMACSDD,
30276 IX86_BUILTIN_VPMACSSDQL,
30277 IX86_BUILTIN_VPMACSSDQH,
30278 IX86_BUILTIN_VPMACSDQL,
30279 IX86_BUILTIN_VPMACSDQH,
30280 IX86_BUILTIN_VPMADCSSWD,
30281 IX86_BUILTIN_VPMADCSWD,
30283 IX86_BUILTIN_VPHADDBW,
30284 IX86_BUILTIN_VPHADDBD,
30285 IX86_BUILTIN_VPHADDBQ,
30286 IX86_BUILTIN_VPHADDWD,
30287 IX86_BUILTIN_VPHADDWQ,
30288 IX86_BUILTIN_VPHADDDQ,
30289 IX86_BUILTIN_VPHADDUBW,
30290 IX86_BUILTIN_VPHADDUBD,
30291 IX86_BUILTIN_VPHADDUBQ,
30292 IX86_BUILTIN_VPHADDUWD,
30293 IX86_BUILTIN_VPHADDUWQ,
30294 IX86_BUILTIN_VPHADDUDQ,
30295 IX86_BUILTIN_VPHSUBBW,
30296 IX86_BUILTIN_VPHSUBWD,
30297 IX86_BUILTIN_VPHSUBDQ,
30299 IX86_BUILTIN_VPROTB,
30300 IX86_BUILTIN_VPROTW,
30301 IX86_BUILTIN_VPROTD,
30302 IX86_BUILTIN_VPROTQ,
30303 IX86_BUILTIN_VPROTB_IMM,
30304 IX86_BUILTIN_VPROTW_IMM,
30305 IX86_BUILTIN_VPROTD_IMM,
30306 IX86_BUILTIN_VPROTQ_IMM,
30308 IX86_BUILTIN_VPSHLB,
30309 IX86_BUILTIN_VPSHLW,
30310 IX86_BUILTIN_VPSHLD,
30311 IX86_BUILTIN_VPSHLQ,
30312 IX86_BUILTIN_VPSHAB,
30313 IX86_BUILTIN_VPSHAW,
30314 IX86_BUILTIN_VPSHAD,
30315 IX86_BUILTIN_VPSHAQ,
30317 IX86_BUILTIN_VFRCZSS,
30318 IX86_BUILTIN_VFRCZSD,
30319 IX86_BUILTIN_VFRCZPS,
30320 IX86_BUILTIN_VFRCZPD,
30321 IX86_BUILTIN_VFRCZPS256,
30322 IX86_BUILTIN_VFRCZPD256,
30324 IX86_BUILTIN_VPCOMEQUB,
30325 IX86_BUILTIN_VPCOMNEUB,
30326 IX86_BUILTIN_VPCOMLTUB,
30327 IX86_BUILTIN_VPCOMLEUB,
30328 IX86_BUILTIN_VPCOMGTUB,
30329 IX86_BUILTIN_VPCOMGEUB,
30330 IX86_BUILTIN_VPCOMFALSEUB,
30331 IX86_BUILTIN_VPCOMTRUEUB,
30333 IX86_BUILTIN_VPCOMEQUW,
30334 IX86_BUILTIN_VPCOMNEUW,
30335 IX86_BUILTIN_VPCOMLTUW,
30336 IX86_BUILTIN_VPCOMLEUW,
30337 IX86_BUILTIN_VPCOMGTUW,
30338 IX86_BUILTIN_VPCOMGEUW,
30339 IX86_BUILTIN_VPCOMFALSEUW,
30340 IX86_BUILTIN_VPCOMTRUEUW,
30342 IX86_BUILTIN_VPCOMEQUD,
30343 IX86_BUILTIN_VPCOMNEUD,
30344 IX86_BUILTIN_VPCOMLTUD,
30345 IX86_BUILTIN_VPCOMLEUD,
30346 IX86_BUILTIN_VPCOMGTUD,
30347 IX86_BUILTIN_VPCOMGEUD,
30348 IX86_BUILTIN_VPCOMFALSEUD,
30349 IX86_BUILTIN_VPCOMTRUEUD,
30351 IX86_BUILTIN_VPCOMEQUQ,
30352 IX86_BUILTIN_VPCOMNEUQ,
30353 IX86_BUILTIN_VPCOMLTUQ,
30354 IX86_BUILTIN_VPCOMLEUQ,
30355 IX86_BUILTIN_VPCOMGTUQ,
30356 IX86_BUILTIN_VPCOMGEUQ,
30357 IX86_BUILTIN_VPCOMFALSEUQ,
30358 IX86_BUILTIN_VPCOMTRUEUQ,
30360 IX86_BUILTIN_VPCOMEQB,
30361 IX86_BUILTIN_VPCOMNEB,
30362 IX86_BUILTIN_VPCOMLTB,
30363 IX86_BUILTIN_VPCOMLEB,
30364 IX86_BUILTIN_VPCOMGTB,
30365 IX86_BUILTIN_VPCOMGEB,
30366 IX86_BUILTIN_VPCOMFALSEB,
30367 IX86_BUILTIN_VPCOMTRUEB,
30369 IX86_BUILTIN_VPCOMEQW,
30370 IX86_BUILTIN_VPCOMNEW,
30371 IX86_BUILTIN_VPCOMLTW,
30372 IX86_BUILTIN_VPCOMLEW,
30373 IX86_BUILTIN_VPCOMGTW,
30374 IX86_BUILTIN_VPCOMGEW,
30375 IX86_BUILTIN_VPCOMFALSEW,
30376 IX86_BUILTIN_VPCOMTRUEW,
30378 IX86_BUILTIN_VPCOMEQD,
30379 IX86_BUILTIN_VPCOMNED,
30380 IX86_BUILTIN_VPCOMLTD,
30381 IX86_BUILTIN_VPCOMLED,
30382 IX86_BUILTIN_VPCOMGTD,
30383 IX86_BUILTIN_VPCOMGED,
30384 IX86_BUILTIN_VPCOMFALSED,
30385 IX86_BUILTIN_VPCOMTRUED,
30387 IX86_BUILTIN_VPCOMEQQ,
30388 IX86_BUILTIN_VPCOMNEQ,
30389 IX86_BUILTIN_VPCOMLTQ,
30390 IX86_BUILTIN_VPCOMLEQ,
30391 IX86_BUILTIN_VPCOMGTQ,
30392 IX86_BUILTIN_VPCOMGEQ,
30393 IX86_BUILTIN_VPCOMFALSEQ,
30394 IX86_BUILTIN_VPCOMTRUEQ,
30396 /* LWP instructions. */
30397 IX86_BUILTIN_LLWPCB,
30398 IX86_BUILTIN_SLWPCB,
30399 IX86_BUILTIN_LWPVAL32,
30400 IX86_BUILTIN_LWPVAL64,
30401 IX86_BUILTIN_LWPINS32,
30402 IX86_BUILTIN_LWPINS64,
30404 IX86_BUILTIN_CLZS,
30406 /* RTM */
30407 IX86_BUILTIN_XBEGIN,
30408 IX86_BUILTIN_XEND,
30409 IX86_BUILTIN_XABORT,
30410 IX86_BUILTIN_XTEST,
30412 /* MPX */
30413 IX86_BUILTIN_BNDMK,
30414 IX86_BUILTIN_BNDSTX,
30415 IX86_BUILTIN_BNDLDX,
30416 IX86_BUILTIN_BNDCL,
30417 IX86_BUILTIN_BNDCU,
30418 IX86_BUILTIN_BNDRET,
30419 IX86_BUILTIN_BNDNARROW,
30420 IX86_BUILTIN_BNDINT,
30421 IX86_BUILTIN_SIZEOF,
30422 IX86_BUILTIN_BNDLOWER,
30423 IX86_BUILTIN_BNDUPPER,
30425 /* BMI instructions. */
30426 IX86_BUILTIN_BEXTR32,
30427 IX86_BUILTIN_BEXTR64,
30428 IX86_BUILTIN_CTZS,
30430 /* TBM instructions. */
30431 IX86_BUILTIN_BEXTRI32,
30432 IX86_BUILTIN_BEXTRI64,
30434 /* BMI2 instructions. */
30435 IX86_BUILTIN_BZHI32,
30436 IX86_BUILTIN_BZHI64,
30437 IX86_BUILTIN_PDEP32,
30438 IX86_BUILTIN_PDEP64,
30439 IX86_BUILTIN_PEXT32,
30440 IX86_BUILTIN_PEXT64,
30442 /* ADX instructions. */
30443 IX86_BUILTIN_ADDCARRYX32,
30444 IX86_BUILTIN_ADDCARRYX64,
30446 /* SBB instructions. */
30447 IX86_BUILTIN_SBB32,
30448 IX86_BUILTIN_SBB64,
30450 /* FSGSBASE instructions. */
30451 IX86_BUILTIN_RDFSBASE32,
30452 IX86_BUILTIN_RDFSBASE64,
30453 IX86_BUILTIN_RDGSBASE32,
30454 IX86_BUILTIN_RDGSBASE64,
30455 IX86_BUILTIN_WRFSBASE32,
30456 IX86_BUILTIN_WRFSBASE64,
30457 IX86_BUILTIN_WRGSBASE32,
30458 IX86_BUILTIN_WRGSBASE64,
30460 /* RDRND instructions. */
30461 IX86_BUILTIN_RDRAND16_STEP,
30462 IX86_BUILTIN_RDRAND32_STEP,
30463 IX86_BUILTIN_RDRAND64_STEP,
30465 /* RDSEED instructions. */
30466 IX86_BUILTIN_RDSEED16_STEP,
30467 IX86_BUILTIN_RDSEED32_STEP,
30468 IX86_BUILTIN_RDSEED64_STEP,
30470 /* F16C instructions. */
30471 IX86_BUILTIN_CVTPH2PS,
30472 IX86_BUILTIN_CVTPH2PS256,
30473 IX86_BUILTIN_CVTPS2PH,
30474 IX86_BUILTIN_CVTPS2PH256,
30476 /* CFString built-in for darwin */
30477 IX86_BUILTIN_CFSTRING,
30479 /* Builtins to get CPU type and supported features. */
30480 IX86_BUILTIN_CPU_INIT,
30481 IX86_BUILTIN_CPU_IS,
30482 IX86_BUILTIN_CPU_SUPPORTS,
30484 /* Read/write FLAGS register built-ins. */
30485 IX86_BUILTIN_READ_FLAGS,
30486 IX86_BUILTIN_WRITE_FLAGS,
30488 IX86_BUILTIN_MAX
30491 /* Table for the ix86 builtin decls. */
30492 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30494 /* Table of all of the builtin functions that are possible with different ISA's
30495 but are waiting to be built until a function is declared to use that
30496 ISA. */
30497 struct builtin_isa {
30498 const char *name; /* function name */
30499 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30500 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30501 bool const_p; /* true if the declaration is constant */
30502 bool leaf_p; /* true if the declaration has leaf attribute */
30503 bool nothrow_p; /* true if the declaration has nothrow attribute */
30504 bool set_and_not_built_p;
30507 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30509 /* Bits that can still enable any inclusion of a builtin. */
30510 static HOST_WIDE_INT deferred_isa_values = 0;
30512 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30513 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30514 function decl in the ix86_builtins array. Returns the function decl or
30515 NULL_TREE, if the builtin was not added.
30517 If the front end has a special hook for builtin functions, delay adding
30518 builtin functions that aren't in the current ISA until the ISA is changed
30519 with function specific optimization. Doing so, can save about 300K for the
30520 default compiler. When the builtin is expanded, check at that time whether
30521 it is valid.
30523 If the front end doesn't have a special hook, record all builtins, even if
30524 it isn't an instruction set in the current ISA in case the user uses
30525 function specific options for a different ISA, so that we don't get scope
30526 errors if a builtin is added in the middle of a function scope. */
30528 static inline tree
30529 def_builtin (HOST_WIDE_INT mask, const char *name,
30530 enum ix86_builtin_func_type tcode,
30531 enum ix86_builtins code)
30533 tree decl = NULL_TREE;
30535 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30537 ix86_builtins_isa[(int) code].isa = mask;
30539 mask &= ~OPTION_MASK_ISA_64BIT;
30540 if (mask == 0
30541 || (mask & ix86_isa_flags) != 0
30542 || (lang_hooks.builtin_function
30543 == lang_hooks.builtin_function_ext_scope))
30546 tree type = ix86_get_builtin_func_type (tcode);
30547 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30548 NULL, NULL_TREE);
30549 ix86_builtins[(int) code] = decl;
30550 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30552 else
30554 /* Just a MASK where set_and_not_built_p == true can potentially
30555 include a builtin. */
30556 deferred_isa_values |= mask;
30557 ix86_builtins[(int) code] = NULL_TREE;
30558 ix86_builtins_isa[(int) code].tcode = tcode;
30559 ix86_builtins_isa[(int) code].name = name;
30560 ix86_builtins_isa[(int) code].leaf_p = false;
30561 ix86_builtins_isa[(int) code].nothrow_p = false;
30562 ix86_builtins_isa[(int) code].const_p = false;
30563 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30567 return decl;
30570 /* Like def_builtin, but also marks the function decl "const". */
30572 static inline tree
30573 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30574 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30576 tree decl = def_builtin (mask, name, tcode, code);
30577 if (decl)
30578 TREE_READONLY (decl) = 1;
30579 else
30580 ix86_builtins_isa[(int) code].const_p = true;
30582 return decl;
30585 /* Add any new builtin functions for a given ISA that may not have been
30586 declared. This saves a bit of space compared to adding all of the
30587 declarations to the tree, even if we didn't use them. */
30589 static void
30590 ix86_add_new_builtins (HOST_WIDE_INT isa)
30592 if ((isa & deferred_isa_values) == 0)
30593 return;
30595 /* Bits in ISA value can be removed from potential isa values. */
30596 deferred_isa_values &= ~isa;
30598 int i;
30599 tree saved_current_target_pragma = current_target_pragma;
30600 current_target_pragma = NULL_TREE;
30602 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30604 if ((ix86_builtins_isa[i].isa & isa) != 0
30605 && ix86_builtins_isa[i].set_and_not_built_p)
30607 tree decl, type;
30609 /* Don't define the builtin again. */
30610 ix86_builtins_isa[i].set_and_not_built_p = false;
30612 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30613 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30614 type, i, BUILT_IN_MD, NULL,
30615 NULL_TREE);
30617 ix86_builtins[i] = decl;
30618 if (ix86_builtins_isa[i].const_p)
30619 TREE_READONLY (decl) = 1;
30620 if (ix86_builtins_isa[i].leaf_p)
30621 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30622 NULL_TREE);
30623 if (ix86_builtins_isa[i].nothrow_p)
30624 TREE_NOTHROW (decl) = 1;
30628 current_target_pragma = saved_current_target_pragma;
30631 /* Bits for builtin_description.flag. */
30633 /* Set when we don't support the comparison natively, and should
30634 swap_comparison in order to support it. */
30635 #define BUILTIN_DESC_SWAP_OPERANDS 1
30637 struct builtin_description
30639 const HOST_WIDE_INT mask;
30640 const enum insn_code icode;
30641 const char *const name;
30642 const enum ix86_builtins code;
30643 const enum rtx_code comparison;
30644 const int flag;
30647 static const struct builtin_description bdesc_comi[] =
30649 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30650 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30651 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30652 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30653 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30654 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30655 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30656 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30657 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30658 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30659 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30660 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30661 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30662 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30663 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30664 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30665 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30666 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30667 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30668 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30669 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30670 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30671 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30672 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30675 static const struct builtin_description bdesc_pcmpestr[] =
30677 /* SSE4.2 */
30678 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30679 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30680 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30681 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30682 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30683 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30684 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30687 static const struct builtin_description bdesc_pcmpistr[] =
30689 /* SSE4.2 */
30690 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30691 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30692 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30693 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30694 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30695 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30696 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30699 /* Special builtins with variable number of arguments. */
30700 static const struct builtin_description bdesc_special_args[] =
30702 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30703 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30704 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
30706 /* 80387 (for use internally for atomic compound assignment). */
30707 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
30708 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
30709 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
30710 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
30712 /* MMX */
30713 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30715 /* 3DNow! */
30716 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30718 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
30719 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
30720 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
30721 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30722 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30723 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30724 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30725 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30726 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30728 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30729 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30730 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30731 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30732 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30733 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30734 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30735 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30737 /* SSE */
30738 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30739 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30740 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30742 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30743 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30744 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30745 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30747 /* SSE or 3DNow!A */
30748 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30749 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
30751 /* SSE2 */
30752 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30753 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30754 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30755 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
30756 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30757 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
30758 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
30759 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
30760 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
30761 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30763 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30764 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30766 /* SSE3 */
30767 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30769 /* SSE4.1 */
30770 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
30772 /* SSE4A */
30773 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30774 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30776 /* AVX */
30777 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
30778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
30780 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30781 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30782 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
30784 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
30786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
30792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
30795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
30799 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
30800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
30801 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
30802 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
30803 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
30804 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
30805 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
30807 /* AVX2 */
30808 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
30809 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
30810 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
30811 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
30812 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
30813 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
30814 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
30815 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
30816 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
30818 /* AVX512F */
30819 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30823 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30831 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30833 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30835 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30836 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30837 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
30840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
30841 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
30842 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
30843 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30844 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30846 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30847 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30862 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30864 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30867 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
30868 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
30869 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
30870 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
30871 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
30872 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
30874 /* FSGSBASE */
30875 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30876 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30877 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30878 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30879 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30880 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30881 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30882 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30884 /* RTM */
30885 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30886 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
30887 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
30889 /* AVX512BW */
30890 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
30891 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
30892 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
30893 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
30895 /* AVX512VL */
30896 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
30897 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
30898 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
30899 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
30900 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30901 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30902 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30903 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30904 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30905 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30906 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30907 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30908 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30909 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30910 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30911 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30912 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30913 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30914 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30915 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30916 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30917 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30918 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30919 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30920 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30921 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30922 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30923 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30924 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30925 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30926 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30927 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30928 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30929 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30930 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30931 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30932 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
30933 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
30934 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
30935 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
30936 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30937 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30938 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30939 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30940 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30941 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30942 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30943 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30944 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30945 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30946 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30947 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30948 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30949 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30950 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30951 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30952 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30953 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30954 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30955 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30956 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30957 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30958 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30959 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30960 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30961 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30962 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30963 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30964 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30965 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30966 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30967 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30968 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30969 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30970 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30971 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30972 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30973 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30974 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30975 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30976 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30977 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30978 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30979 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30980 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30981 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30982 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30983 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30984 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30985 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30986 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30987 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30988 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30989 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30991 /* PCOMMIT. */
30992 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
30995 /* Builtins with variable number of arguments. */
30996 static const struct builtin_description bdesc_args[] =
30998 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
30999 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
31000 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
31001 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31002 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31003 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31004 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31006 /* MMX */
31007 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31008 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31009 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31010 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31011 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31012 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31014 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31015 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31016 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31017 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31018 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31019 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31020 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31021 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31023 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31024 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31026 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31027 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31028 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31029 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31031 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31032 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31033 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31034 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31035 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31036 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31038 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31039 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31040 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31041 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31042 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
31043 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
31045 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31046 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
31047 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31049 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
31051 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31052 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31053 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31054 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31055 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31056 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31058 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31059 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31060 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31061 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31062 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31063 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31065 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31066 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31067 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31068 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31070 /* 3DNow! */
31071 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31072 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31073 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31074 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31076 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31077 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31078 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31079 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31080 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31081 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31082 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31083 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31084 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31085 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31086 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31087 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31088 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31089 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31090 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31092 /* 3DNow!A */
31093 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31094 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31095 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31096 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31097 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31098 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31100 /* SSE */
31101 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
31102 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31103 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31104 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31105 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31106 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31107 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31108 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31109 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31110 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31111 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31112 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31114 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31116 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31117 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31118 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31119 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31120 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31121 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31122 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31123 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31125 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31126 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31127 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31128 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31129 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31130 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31131 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31132 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31133 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31134 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31135 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
31136 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31137 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31138 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31139 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31140 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31141 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31142 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31143 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31144 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31146 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31147 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31148 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31149 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31151 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31152 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31153 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31154 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31156 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31158 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31159 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31160 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31161 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31162 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31164 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
31165 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
31166 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
31168 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
31170 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31171 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31172 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31174 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
31175 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
31177 /* SSE MMX or 3Dnow!A */
31178 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31179 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31180 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31182 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31183 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31184 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31185 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31187 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
31188 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
31190 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
31192 /* SSE2 */
31193 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31195 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
31196 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
31197 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31198 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
31199 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
31201 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31202 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31203 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
31204 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31205 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31207 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
31209 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31210 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31211 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31212 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31214 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31215 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31216 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31218 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31219 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31220 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31221 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31222 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31223 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31224 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31225 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31227 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31228 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31229 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31230 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31231 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31232 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31233 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31234 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31235 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31236 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31237 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31238 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31239 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31240 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31241 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31242 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31243 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31244 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31245 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31246 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31248 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31249 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31250 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31251 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31253 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31254 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31255 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31256 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31258 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31260 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31261 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31262 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31264 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31266 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31267 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31268 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31269 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31270 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31271 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31272 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31273 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31275 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31276 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31277 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31278 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31279 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31280 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31281 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31282 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31284 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31285 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31287 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31288 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31289 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31290 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31292 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31293 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31295 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31296 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31297 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31298 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31299 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31300 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31302 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31303 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31304 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31305 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31307 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31308 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31309 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31310 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31311 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31312 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31313 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31314 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31316 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31317 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31320 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31324 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31328 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31329 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31330 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31331 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31333 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31334 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31335 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31336 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31337 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31338 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31339 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31341 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31342 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31343 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31344 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31345 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31346 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31347 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31349 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31350 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31351 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31352 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31354 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31355 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31356 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31358 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31360 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31362 /* SSE2 MMX */
31363 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31364 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31366 /* SSE3 */
31367 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31368 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31370 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31371 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31372 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31373 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31374 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31375 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31377 /* SSSE3 */
31378 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31379 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31380 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31381 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31382 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31383 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31385 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31386 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31387 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31388 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31389 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31390 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31391 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31392 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31393 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31394 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31395 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31396 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31397 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31398 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31399 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31400 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31401 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31402 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31403 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31404 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31405 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31406 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31407 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31408 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31410 /* SSSE3. */
31411 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31412 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31414 /* SSE4.1 */
31415 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31416 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31417 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31418 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31419 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31420 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31421 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31422 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31423 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31424 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31426 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31427 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31428 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31429 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31430 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31431 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31432 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31433 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31434 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31435 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31436 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31437 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31438 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31440 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31441 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31442 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31443 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31444 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31445 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31446 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31447 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31448 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31449 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31450 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31451 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31453 /* SSE4.1 */
31454 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31455 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31456 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31457 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31459 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31460 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31461 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31462 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31464 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31465 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31467 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31468 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31470 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31471 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31472 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31473 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31475 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31476 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31478 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31479 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31481 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31482 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31483 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31485 /* SSE4.2 */
31486 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31487 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31488 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31489 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31490 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31492 /* SSE4A */
31493 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31494 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31495 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31496 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31498 /* AES */
31499 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31500 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31502 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31503 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31504 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31505 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31507 /* PCLMUL */
31508 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31510 /* AVX */
31511 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31512 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31513 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31514 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31515 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31516 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31517 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31518 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31519 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31520 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31521 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31522 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31523 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31524 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31525 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31526 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31527 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31528 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31529 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31530 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31531 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31532 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31533 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31534 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31535 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31536 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31538 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31539 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31540 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31541 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31543 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31544 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31545 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31546 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31547 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31548 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31549 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31550 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31551 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31552 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31553 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31554 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31555 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31556 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31557 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31558 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31559 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31560 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31561 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31562 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31563 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31564 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31565 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31566 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31567 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31568 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31569 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31570 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31571 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31572 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31573 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31574 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31575 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31576 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31578 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31579 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31580 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31582 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31583 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31584 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31585 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31586 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31588 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31590 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31591 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31593 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31594 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31595 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31596 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31598 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31599 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31601 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31602 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31604 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31605 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31606 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31607 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31609 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31610 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31612 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31613 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31615 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31616 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31617 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31618 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31620 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31621 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31622 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31623 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31624 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31625 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31627 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31628 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31629 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31630 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31631 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31632 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31633 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31634 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31635 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31636 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31637 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31638 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31639 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31640 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31641 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31643 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31644 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31646 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31647 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31649 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31651 /* AVX2 */
31652 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31653 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31654 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31655 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31656 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31657 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31658 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31659 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31660 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31661 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31662 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31663 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31664 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31665 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31666 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31667 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31668 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31669 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31670 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31671 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31672 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31673 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31674 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31675 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31676 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31677 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31678 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31679 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31680 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31681 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31682 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31683 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31684 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31685 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31686 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31687 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31688 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31689 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31690 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31691 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31692 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31693 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31694 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31695 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31696 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31697 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31698 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31699 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31700 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31701 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31702 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31703 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31704 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31705 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31706 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31707 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31708 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31709 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31710 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31711 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31712 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31713 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31714 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31715 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31716 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31717 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31718 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31719 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31720 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31721 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31722 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31723 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31724 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31725 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31726 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
31727 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31728 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31729 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31730 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31731 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31732 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31733 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31734 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31735 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31736 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31737 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31738 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31739 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31740 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31741 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31742 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31743 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31744 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31745 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31746 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31747 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31748 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31749 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31750 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31751 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31752 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31753 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31754 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31755 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31756 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31757 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31758 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31759 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31760 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31761 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31762 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31763 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31764 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31765 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31766 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31767 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31768 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31769 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31770 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31771 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31772 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31773 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
31774 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
31775 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31776 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31777 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31778 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31779 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31780 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31781 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31782 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31783 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31784 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
31785 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
31786 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
31787 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
31788 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31789 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31790 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31791 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31792 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31793 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31794 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31795 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31796 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31797 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31799 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31801 /* BMI */
31802 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31803 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31804 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31806 /* TBM */
31807 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31808 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31810 /* F16C */
31811 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
31812 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
31813 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
31814 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
31816 /* BMI2 */
31817 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31818 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31819 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31820 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31821 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31822 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31824 /* AVX512F */
31825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
31826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
31827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
31828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
31829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
31830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
31831 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31833 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31835 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31836 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31837 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
31839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
31841 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
31842 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31843 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31844 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31846 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31847 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
31849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
31851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
31856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
31857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
31858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
31859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
31860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
31861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
31862 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
31863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31864 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31866 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31867 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31868 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31869 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31870 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31871 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31872 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31873 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31874 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31875 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31876 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31877 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31878 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31879 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
31880 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
31881 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
31882 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
31883 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31884 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31885 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31886 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31887 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31888 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31889 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31890 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31891 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31892 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31893 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31894 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31895 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31896 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31897 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31898 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31899 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31900 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31901 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31902 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31903 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31904 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31905 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31906 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31907 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31908 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31909 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31910 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31911 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31912 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31913 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31914 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31915 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31982 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31984 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31986 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31988 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31989 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31991 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31992 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31993 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31994 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
31996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
32023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
32024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
32025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32026 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
32028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32031 /* Mask arithmetic operations */
32032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
32035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
32039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
32043 /* SHA */
32044 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32045 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32048 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32049 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32050 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
32052 /* AVX512VL. */
32053 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
32054 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
32055 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32056 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32057 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32058 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32059 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32060 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32061 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32062 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32063 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32064 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32065 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32066 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32067 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32068 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32069 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32070 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32071 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32072 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32073 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32074 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32075 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32076 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32077 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32078 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32079 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32080 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32081 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32082 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32083 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32084 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32085 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32086 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32087 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32088 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32089 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32090 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32091 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32092 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32093 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32094 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32095 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32096 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32097 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32098 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32099 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32100 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32101 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32102 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32103 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32104 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32105 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32106 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
32107 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
32108 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
32109 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
32110 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32111 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32112 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32113 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32114 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32115 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32116 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32117 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32118 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32119 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32120 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32121 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32122 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32123 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32124 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32125 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32126 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32127 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32128 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32129 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32130 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32131 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32132 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32133 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32134 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32135 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32136 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32137 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32138 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32139 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32140 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32141 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32142 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
32143 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
32144 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
32145 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
32146 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32147 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
32148 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
32149 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
32150 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32151 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
32152 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32153 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
32154 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32155 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
32156 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32157 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32158 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32159 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32160 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32161 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32162 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32163 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
32164 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
32165 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
32166 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
32167 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32168 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32169 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32170 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32171 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32172 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32173 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32174 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32175 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32176 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32177 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32178 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32179 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32180 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32181 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32182 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32183 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32184 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32186 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32187 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32188 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32189 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32190 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32191 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32192 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32193 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32194 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32195 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32196 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32197 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32198 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32199 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32200 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32201 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32202 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32203 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32204 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32205 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32206 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32207 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32208 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32209 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32210 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32211 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32212 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32213 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32214 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32216 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32217 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32218 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32219 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32220 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32221 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32222 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32223 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32224 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32225 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32226 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32227 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32228 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32229 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32230 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32231 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32232 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32233 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32234 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32235 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32236 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32237 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32238 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32239 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32240 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32241 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32242 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32243 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32244 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32245 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32246 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32247 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32248 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32249 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32250 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32251 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32252 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32253 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32254 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32255 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32256 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32257 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32258 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32259 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32260 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32261 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32262 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32263 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32264 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32265 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32266 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32267 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32268 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32269 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32270 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32271 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32272 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32273 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32274 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32275 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32276 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32277 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32278 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32279 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32280 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32281 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32282 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32283 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32284 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32285 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32286 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32287 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32288 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32289 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32290 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32291 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32292 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32293 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32294 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32295 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32296 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32297 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32298 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32299 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32300 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32301 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32302 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32303 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32304 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32305 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32306 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32307 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32308 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32309 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32310 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32311 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32312 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32313 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32314 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32315 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32316 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32317 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32318 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32319 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32320 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32321 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32322 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32323 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32324 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32325 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32326 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32327 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32328 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32330 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32331 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32332 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32333 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32334 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32335 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32336 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32337 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32338 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32339 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32340 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32341 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32342 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32343 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32344 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32345 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32346 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32347 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32348 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32349 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32350 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32351 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32352 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32353 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32354 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32355 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32356 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32357 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32358 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32359 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32360 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32361 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32362 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32363 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32364 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32365 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32366 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32367 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32368 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32369 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32370 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32371 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32372 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32373 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32374 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32375 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32376 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32377 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32378 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32379 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32380 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32381 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32382 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32383 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32384 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32385 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32386 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32387 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32388 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32389 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32390 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32391 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32392 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32393 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32394 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32395 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32396 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32397 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32398 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32399 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32400 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32401 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32402 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32403 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32404 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32405 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32406 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32407 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32408 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32409 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32410 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32412 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32413 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32415 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32416 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32417 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32418 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32419 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32420 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32421 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32422 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32423 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32424 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32425 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32426 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32427 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32428 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32429 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32430 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32431 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32432 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32433 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32434 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32435 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
32436 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
32437 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32438 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32439 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
32440 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
32441 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
32442 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
32443 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32444 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32447 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32448 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32449 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32450 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32451 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32452 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32453 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32454 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32455 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32456 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32457 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32458 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32459 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32460 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32461 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32462 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32463 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32464 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32465 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32466 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32467 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32468 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32469 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32470 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32471 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32472 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32473 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32474 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32475 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32476 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32477 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32478 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32479 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32480 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32481 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32482 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32483 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32484 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32485 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32486 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32487 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32488 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32489 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32490 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32491 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32492 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32493 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32494 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32495 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32496 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32497 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32498 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32499 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32500 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32501 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32502 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32503 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32504 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32505 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32506 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32507 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32508 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32509 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32510 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32511 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32512 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32513 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32514 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32515 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32516 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32517 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32518 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32519 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32520 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32521 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32522 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32523 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32524 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32525 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32526 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32527 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32528 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32529 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32530 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32531 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32532 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32533 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32534 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32535 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32536 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32537 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32538 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32539 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32540 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32541 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32542 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32543 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
32544 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
32545 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32546 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
32547 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
32548 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32549 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
32550 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
32551 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
32552 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
32553 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
32554 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
32555 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
32556 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
32557 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
32558 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
32559 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
32560 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
32561 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
32562 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
32563 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
32564 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
32565 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32566 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32567 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32568 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32569 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32570 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32571 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32572 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32573 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32574 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32575 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32576 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32577 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32578 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32579 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32580 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32581 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32582 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32583 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32584 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32585 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32586 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32587 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32588 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32589 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32590 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32591 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32592 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32593 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32594 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32595 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32596 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32597 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32598 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32599 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32600 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32601 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32602 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32603 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32604 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32605 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32606 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32607 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32608 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32609 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32610 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32611 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32612 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32613 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32614 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32615 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32616 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32617 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32618 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32619 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32620 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32621 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32622 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32623 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32624 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32625 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32626 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32627 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32628 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32629 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32630 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32631 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32632 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32633 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32634 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32635 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32636 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32637 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32638 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32639 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32640 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32641 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32642 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32643 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32644 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32645 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32646 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32647 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32648 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32649 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32650 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32651 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32652 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32653 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32654 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32655 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32656 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32657 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32658 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32659 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32660 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32661 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32662 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32663 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32664 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32665 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32666 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32667 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32668 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32669 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32670 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32671 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32672 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32673 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32674 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32675 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32676 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32677 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32678 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32679 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32680 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32681 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32682 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32683 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32684 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32685 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32686 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32687 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32688 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32689 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32690 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32691 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32692 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32693 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32694 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32695 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32696 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32697 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32698 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32699 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32700 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32701 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32702 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32703 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32704 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32705 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32706 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32707 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32708 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32709 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32710 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32711 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32712 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32713 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32714 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32715 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32716 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32717 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32718 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32719 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32720 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32721 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32722 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32723 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32724 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32725 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32726 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32727 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32728 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32729 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32730 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32731 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32732 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32733 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32734 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32735 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32736 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32737 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32738 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32739 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32740 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
32741 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
32742 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32743 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32744 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32745 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32746 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32747 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32748 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32749 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32750 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32751 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32752 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32753 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
32754 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
32755 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32756 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32757 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32758 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32759 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32760 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32761 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32762 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32763 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
32764 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
32766 /* AVX512DQ. */
32767 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32768 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32769 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32770 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32771 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
32772 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
32773 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
32774 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
32775 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
32776 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
32777 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32778 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32779 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32780 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32781 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32782 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32783 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32784 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32785 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32786 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
32787 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32788 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
32789 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
32790 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
32791 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
32792 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
32793 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
32794 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
32795 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
32796 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
32797 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
32799 /* AVX512BW. */
32800 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
32801 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
32802 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32803 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32804 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32805 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32806 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
32807 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
32808 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32809 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32810 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
32811 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
32812 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
32813 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
32814 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
32815 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
32816 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32817 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32818 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32819 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32820 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32821 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32822 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32823 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32824 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32825 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32826 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32827 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32828 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32829 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32830 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32831 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32832 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32833 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32834 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32835 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32836 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32837 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32838 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32839 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32840 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32841 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32842 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32843 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32844 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32845 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32846 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32847 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32848 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32849 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32850 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32851 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32852 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32853 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32854 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32855 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32856 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
32857 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
32858 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32859 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32860 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32861 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32862 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32863 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32864 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32865 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32866 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32867 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32868 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32869 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32870 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
32871 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
32872 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
32873 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
32874 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32875 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32876 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32877 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32878 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32879 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32880 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32881 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32882 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32883 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32884 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32885 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32886 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32887 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32888 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32889 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32890 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32892 /* AVX512IFMA */
32893 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32894 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32895 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32896 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32897 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32898 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32899 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32900 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32901 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32902 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32903 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32904 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32906 /* AVX512VBMI */
32907 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32908 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32909 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32910 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32911 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32912 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32913 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32914 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32915 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32916 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32917 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32918 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32919 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32920 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32921 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32924 /* Builtins with rounding support. */
32925 static const struct builtin_description bdesc_round_args[] =
32927 /* AVX512F */
32928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
32933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
32934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
32935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
32936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
32937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
32938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
32941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
32943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
32945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
32947 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
32948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
32949 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
32950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
32951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32956 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
32957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
32958 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
32959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32982 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32984 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32986 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32988 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32989 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32992 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32994 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33008 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33010 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33012 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33014 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33016 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33018 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33020 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33022 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
33030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
33031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33048 /* AVX512ER */
33049 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33050 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33051 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33052 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33053 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33054 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33055 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33056 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33057 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33058 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33060 /* AVX512DQ. */
33061 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33062 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33063 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33064 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33065 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33066 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33067 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33068 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33069 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33070 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33071 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33072 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33073 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33074 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33075 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
33076 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
33079 /* Bultins for MPX. */
33080 static const struct builtin_description bdesc_mpx[] =
33082 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
33083 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33084 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33087 /* Const builtins for MPX. */
33088 static const struct builtin_description bdesc_mpx_const[] =
33090 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
33091 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
33092 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
33093 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
33094 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
33095 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
33096 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
33097 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
33100 /* FMA4 and XOP. */
33101 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
33102 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
33103 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
33104 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
33105 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
33106 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
33107 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
33108 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
33109 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
33110 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
33111 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
33112 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
33113 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
33114 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
33115 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
33116 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
33117 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
33118 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
33119 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
33120 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
33121 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
33122 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
33123 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
33124 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
33125 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
33126 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
33127 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
33128 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
33129 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
33130 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
33131 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
33132 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
33133 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
33134 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
33135 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
33136 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
33137 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
33138 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
33139 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
33140 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
33141 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
33142 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
33143 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
33144 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
33145 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
33146 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
33147 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
33148 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
33149 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
33150 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
33151 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
33152 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
33154 static const struct builtin_description bdesc_multi_arg[] =
33156 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
33157 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
33158 UNKNOWN, (int)MULTI_ARG_3_SF },
33159 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
33160 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
33161 UNKNOWN, (int)MULTI_ARG_3_DF },
33163 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
33164 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
33165 UNKNOWN, (int)MULTI_ARG_3_SF },
33166 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
33167 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
33168 UNKNOWN, (int)MULTI_ARG_3_DF },
33170 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
33171 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
33172 UNKNOWN, (int)MULTI_ARG_3_SF },
33173 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
33174 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
33175 UNKNOWN, (int)MULTI_ARG_3_DF },
33176 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
33177 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
33178 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33179 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
33180 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
33181 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33183 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
33184 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
33185 UNKNOWN, (int)MULTI_ARG_3_SF },
33186 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
33187 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
33188 UNKNOWN, (int)MULTI_ARG_3_DF },
33189 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
33190 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
33191 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33192 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
33193 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
33194 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33196 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
33197 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
33198 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
33199 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
33200 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
33201 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
33202 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
33204 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33205 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33206 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
33207 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
33208 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
33209 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
33210 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
33212 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
33214 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33215 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33216 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33217 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33218 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33219 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33220 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33221 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33223 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33224 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33225 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33227 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33228 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
33229 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
33230 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
33231 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
33232 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
33233 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
33234 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
33235 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33236 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
33237 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
33238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
33239 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33240 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
33241 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
33242 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
33244 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
33245 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
33246 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
33247 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
33248 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
33249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
33251 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33252 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33253 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33254 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33255 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33256 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33257 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33258 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33259 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33260 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33261 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33262 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33263 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33264 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33265 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33267 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
33268 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33269 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33270 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
33271 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
33272 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
33273 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
33275 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
33276 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33277 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33278 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
33279 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
33280 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
33281 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
33283 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
33284 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33285 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33286 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
33287 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
33288 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
33289 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
33291 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33292 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33293 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33294 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
33295 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
33296 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
33297 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
33299 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
33300 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33301 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33302 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
33303 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
33304 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
33305 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
33307 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
33308 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33309 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33310 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
33311 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
33312 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
33313 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
33315 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
33316 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33317 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33318 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
33319 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
33320 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
33321 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
33323 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33324 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33325 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33326 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
33327 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
33328 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
33329 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
33331 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33332 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33333 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33334 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33335 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33336 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33337 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33338 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33340 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33341 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33342 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33343 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33344 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33345 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33346 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33347 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33349 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33350 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33351 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33352 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33356 /* TM vector builtins. */
33358 /* Reuse the existing x86-specific `struct builtin_description' cause
33359 we're lazy. Add casts to make them fit. */
33360 static const struct builtin_description bdesc_tm[] =
33362 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33363 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33364 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33365 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33366 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33367 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33368 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33370 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33371 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33372 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33373 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33374 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33375 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33376 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33378 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33379 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33380 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33381 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33382 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33383 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33384 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33386 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33387 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33388 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33391 /* TM callbacks. */
33393 /* Return the builtin decl needed to load a vector of TYPE. */
33395 static tree
33396 ix86_builtin_tm_load (tree type)
33398 if (TREE_CODE (type) == VECTOR_TYPE)
33400 switch (tree_to_uhwi (TYPE_SIZE (type)))
33402 case 64:
33403 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33404 case 128:
33405 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33406 case 256:
33407 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33410 return NULL_TREE;
33413 /* Return the builtin decl needed to store a vector of TYPE. */
33415 static tree
33416 ix86_builtin_tm_store (tree type)
33418 if (TREE_CODE (type) == VECTOR_TYPE)
33420 switch (tree_to_uhwi (TYPE_SIZE (type)))
33422 case 64:
33423 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33424 case 128:
33425 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33426 case 256:
33427 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33430 return NULL_TREE;
33433 /* Initialize the transactional memory vector load/store builtins. */
33435 static void
33436 ix86_init_tm_builtins (void)
33438 enum ix86_builtin_func_type ftype;
33439 const struct builtin_description *d;
33440 size_t i;
33441 tree decl;
33442 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33443 tree attrs_log, attrs_type_log;
33445 if (!flag_tm)
33446 return;
33448 /* If there are no builtins defined, we must be compiling in a
33449 language without trans-mem support. */
33450 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33451 return;
33453 /* Use whatever attributes a normal TM load has. */
33454 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33455 attrs_load = DECL_ATTRIBUTES (decl);
33456 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33457 /* Use whatever attributes a normal TM store has. */
33458 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33459 attrs_store = DECL_ATTRIBUTES (decl);
33460 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33461 /* Use whatever attributes a normal TM log has. */
33462 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33463 attrs_log = DECL_ATTRIBUTES (decl);
33464 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33466 for (i = 0, d = bdesc_tm;
33467 i < ARRAY_SIZE (bdesc_tm);
33468 i++, d++)
33470 if ((d->mask & ix86_isa_flags) != 0
33471 || (lang_hooks.builtin_function
33472 == lang_hooks.builtin_function_ext_scope))
33474 tree type, attrs, attrs_type;
33475 enum built_in_function code = (enum built_in_function) d->code;
33477 ftype = (enum ix86_builtin_func_type) d->flag;
33478 type = ix86_get_builtin_func_type (ftype);
33480 if (BUILTIN_TM_LOAD_P (code))
33482 attrs = attrs_load;
33483 attrs_type = attrs_type_load;
33485 else if (BUILTIN_TM_STORE_P (code))
33487 attrs = attrs_store;
33488 attrs_type = attrs_type_store;
33490 else
33492 attrs = attrs_log;
33493 attrs_type = attrs_type_log;
33495 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33496 /* The builtin without the prefix for
33497 calling it directly. */
33498 d->name + strlen ("__builtin_"),
33499 attrs);
33500 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33501 set the TYPE_ATTRIBUTES. */
33502 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33504 set_builtin_decl (code, decl, false);
33509 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33510 in the current target ISA to allow the user to compile particular modules
33511 with different target specific options that differ from the command line
33512 options. */
33513 static void
33514 ix86_init_mmx_sse_builtins (void)
33516 const struct builtin_description * d;
33517 enum ix86_builtin_func_type ftype;
33518 size_t i;
33520 /* Add all special builtins with variable number of operands. */
33521 for (i = 0, d = bdesc_special_args;
33522 i < ARRAY_SIZE (bdesc_special_args);
33523 i++, d++)
33525 if (d->name == 0)
33526 continue;
33528 ftype = (enum ix86_builtin_func_type) d->flag;
33529 def_builtin (d->mask, d->name, ftype, d->code);
33532 /* Add all builtins with variable number of operands. */
33533 for (i = 0, d = bdesc_args;
33534 i < ARRAY_SIZE (bdesc_args);
33535 i++, d++)
33537 if (d->name == 0)
33538 continue;
33540 ftype = (enum ix86_builtin_func_type) d->flag;
33541 def_builtin_const (d->mask, d->name, ftype, d->code);
33544 /* Add all builtins with rounding. */
33545 for (i = 0, d = bdesc_round_args;
33546 i < ARRAY_SIZE (bdesc_round_args);
33547 i++, d++)
33549 if (d->name == 0)
33550 continue;
33552 ftype = (enum ix86_builtin_func_type) d->flag;
33553 def_builtin_const (d->mask, d->name, ftype, d->code);
33556 /* pcmpestr[im] insns. */
33557 for (i = 0, d = bdesc_pcmpestr;
33558 i < ARRAY_SIZE (bdesc_pcmpestr);
33559 i++, d++)
33561 if (d->code == IX86_BUILTIN_PCMPESTRM128)
33562 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33563 else
33564 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33565 def_builtin_const (d->mask, d->name, ftype, d->code);
33568 /* pcmpistr[im] insns. */
33569 for (i = 0, d = bdesc_pcmpistr;
33570 i < ARRAY_SIZE (bdesc_pcmpistr);
33571 i++, d++)
33573 if (d->code == IX86_BUILTIN_PCMPISTRM128)
33574 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33575 else
33576 ftype = INT_FTYPE_V16QI_V16QI_INT;
33577 def_builtin_const (d->mask, d->name, ftype, d->code);
33580 /* comi/ucomi insns. */
33581 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33583 if (d->mask == OPTION_MASK_ISA_SSE2)
33584 ftype = INT_FTYPE_V2DF_V2DF;
33585 else
33586 ftype = INT_FTYPE_V4SF_V4SF;
33587 def_builtin_const (d->mask, d->name, ftype, d->code);
33590 /* SSE */
33591 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33592 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33593 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33594 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33596 /* SSE or 3DNow!A */
33597 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33598 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33599 IX86_BUILTIN_MASKMOVQ);
33601 /* SSE2 */
33602 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33603 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33605 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33606 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33607 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33608 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33610 /* SSE3. */
33611 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33612 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33613 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33614 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33616 /* AES */
33617 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33618 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33619 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33620 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33621 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33622 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33623 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33624 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33625 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33626 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33627 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33628 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33630 /* PCLMUL */
33631 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33632 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33634 /* RDRND */
33635 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33636 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33637 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33638 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33639 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33640 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33641 IX86_BUILTIN_RDRAND64_STEP);
33643 /* AVX2 */
33644 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33645 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33646 IX86_BUILTIN_GATHERSIV2DF);
33648 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33649 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33650 IX86_BUILTIN_GATHERSIV4DF);
33652 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33653 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33654 IX86_BUILTIN_GATHERDIV2DF);
33656 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33657 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33658 IX86_BUILTIN_GATHERDIV4DF);
33660 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33661 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33662 IX86_BUILTIN_GATHERSIV4SF);
33664 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33665 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33666 IX86_BUILTIN_GATHERSIV8SF);
33668 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33669 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33670 IX86_BUILTIN_GATHERDIV4SF);
33672 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33673 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33674 IX86_BUILTIN_GATHERDIV8SF);
33676 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33677 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33678 IX86_BUILTIN_GATHERSIV2DI);
33680 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33681 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33682 IX86_BUILTIN_GATHERSIV4DI);
33684 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33685 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33686 IX86_BUILTIN_GATHERDIV2DI);
33688 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33689 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33690 IX86_BUILTIN_GATHERDIV4DI);
33692 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33693 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33694 IX86_BUILTIN_GATHERSIV4SI);
33696 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33697 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33698 IX86_BUILTIN_GATHERSIV8SI);
33700 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33701 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33702 IX86_BUILTIN_GATHERDIV4SI);
33704 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
33705 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
33706 IX86_BUILTIN_GATHERDIV8SI);
33708 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
33709 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
33710 IX86_BUILTIN_GATHERALTSIV4DF);
33712 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
33713 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
33714 IX86_BUILTIN_GATHERALTDIV8SF);
33716 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
33717 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
33718 IX86_BUILTIN_GATHERALTSIV4DI);
33720 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
33721 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
33722 IX86_BUILTIN_GATHERALTDIV8SI);
33724 /* AVX512F */
33725 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
33726 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
33727 IX86_BUILTIN_GATHER3SIV16SF);
33729 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
33730 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
33731 IX86_BUILTIN_GATHER3SIV8DF);
33733 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
33734 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
33735 IX86_BUILTIN_GATHER3DIV16SF);
33737 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
33738 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
33739 IX86_BUILTIN_GATHER3DIV8DF);
33741 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
33742 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
33743 IX86_BUILTIN_GATHER3SIV16SI);
33745 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
33746 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
33747 IX86_BUILTIN_GATHER3SIV8DI);
33749 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
33750 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
33751 IX86_BUILTIN_GATHER3DIV16SI);
33753 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
33754 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
33755 IX86_BUILTIN_GATHER3DIV8DI);
33757 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
33758 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
33759 IX86_BUILTIN_GATHER3ALTSIV8DF);
33761 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
33762 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
33763 IX86_BUILTIN_GATHER3ALTDIV16SF);
33765 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
33766 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
33767 IX86_BUILTIN_GATHER3ALTSIV8DI);
33769 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
33770 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
33771 IX86_BUILTIN_GATHER3ALTDIV16SI);
33773 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
33774 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
33775 IX86_BUILTIN_SCATTERSIV16SF);
33777 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
33778 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
33779 IX86_BUILTIN_SCATTERSIV8DF);
33781 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
33782 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
33783 IX86_BUILTIN_SCATTERDIV16SF);
33785 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
33786 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
33787 IX86_BUILTIN_SCATTERDIV8DF);
33789 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
33790 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
33791 IX86_BUILTIN_SCATTERSIV16SI);
33793 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
33794 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
33795 IX86_BUILTIN_SCATTERSIV8DI);
33797 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
33798 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
33799 IX86_BUILTIN_SCATTERDIV16SI);
33801 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
33802 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
33803 IX86_BUILTIN_SCATTERDIV8DI);
33805 /* AVX512VL */
33806 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
33807 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
33808 IX86_BUILTIN_GATHER3SIV2DF);
33810 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
33811 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
33812 IX86_BUILTIN_GATHER3SIV4DF);
33814 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
33815 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
33816 IX86_BUILTIN_GATHER3DIV2DF);
33818 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
33819 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
33820 IX86_BUILTIN_GATHER3DIV4DF);
33822 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
33823 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
33824 IX86_BUILTIN_GATHER3SIV4SF);
33826 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
33827 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
33828 IX86_BUILTIN_GATHER3SIV8SF);
33830 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
33831 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
33832 IX86_BUILTIN_GATHER3DIV4SF);
33834 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
33835 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
33836 IX86_BUILTIN_GATHER3DIV8SF);
33838 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
33839 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
33840 IX86_BUILTIN_GATHER3SIV2DI);
33842 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
33843 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
33844 IX86_BUILTIN_GATHER3SIV4DI);
33846 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
33847 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
33848 IX86_BUILTIN_GATHER3DIV2DI);
33850 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
33851 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
33852 IX86_BUILTIN_GATHER3DIV4DI);
33854 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
33855 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
33856 IX86_BUILTIN_GATHER3SIV4SI);
33858 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
33859 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
33860 IX86_BUILTIN_GATHER3SIV8SI);
33862 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
33863 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
33864 IX86_BUILTIN_GATHER3DIV4SI);
33866 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
33867 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
33868 IX86_BUILTIN_GATHER3DIV8SI);
33870 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
33871 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
33872 IX86_BUILTIN_GATHER3ALTSIV4DF);
33874 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
33875 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
33876 IX86_BUILTIN_GATHER3ALTDIV8SF);
33878 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
33879 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
33880 IX86_BUILTIN_GATHER3ALTSIV4DI);
33882 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
33883 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
33884 IX86_BUILTIN_GATHER3ALTDIV8SI);
33886 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
33887 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
33888 IX86_BUILTIN_SCATTERSIV8SF);
33890 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
33891 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
33892 IX86_BUILTIN_SCATTERSIV4SF);
33894 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
33895 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
33896 IX86_BUILTIN_SCATTERSIV4DF);
33898 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
33899 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
33900 IX86_BUILTIN_SCATTERSIV2DF);
33902 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
33903 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
33904 IX86_BUILTIN_SCATTERDIV8SF);
33906 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
33907 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
33908 IX86_BUILTIN_SCATTERDIV4SF);
33910 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
33911 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
33912 IX86_BUILTIN_SCATTERDIV4DF);
33914 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
33915 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
33916 IX86_BUILTIN_SCATTERDIV2DF);
33918 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
33919 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
33920 IX86_BUILTIN_SCATTERSIV8SI);
33922 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
33923 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
33924 IX86_BUILTIN_SCATTERSIV4SI);
33926 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
33927 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
33928 IX86_BUILTIN_SCATTERSIV4DI);
33930 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
33931 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
33932 IX86_BUILTIN_SCATTERSIV2DI);
33934 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
33935 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
33936 IX86_BUILTIN_SCATTERDIV8SI);
33938 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
33939 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
33940 IX86_BUILTIN_SCATTERDIV4SI);
33942 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
33943 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
33944 IX86_BUILTIN_SCATTERDIV4DI);
33946 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
33947 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
33948 IX86_BUILTIN_SCATTERDIV2DI);
33950 /* AVX512PF */
33951 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
33952 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33953 IX86_BUILTIN_GATHERPFDPD);
33954 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
33955 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33956 IX86_BUILTIN_GATHERPFDPS);
33957 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
33958 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33959 IX86_BUILTIN_GATHERPFQPD);
33960 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
33961 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33962 IX86_BUILTIN_GATHERPFQPS);
33963 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
33964 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33965 IX86_BUILTIN_SCATTERPFDPD);
33966 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
33967 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33968 IX86_BUILTIN_SCATTERPFDPS);
33969 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
33970 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33971 IX86_BUILTIN_SCATTERPFQPD);
33972 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
33973 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33974 IX86_BUILTIN_SCATTERPFQPS);
33976 /* SHA */
33977 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
33978 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
33979 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
33980 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
33981 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
33982 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
33983 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
33984 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
33985 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
33986 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
33987 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
33988 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
33989 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
33990 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
33992 /* RTM. */
33993 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
33994 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
33996 /* MMX access to the vec_init patterns. */
33997 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
33998 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
34000 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
34001 V4HI_FTYPE_HI_HI_HI_HI,
34002 IX86_BUILTIN_VEC_INIT_V4HI);
34004 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
34005 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
34006 IX86_BUILTIN_VEC_INIT_V8QI);
34008 /* Access to the vec_extract patterns. */
34009 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
34010 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
34011 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
34012 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
34013 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
34014 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
34015 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
34016 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
34017 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
34018 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
34020 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34021 "__builtin_ia32_vec_ext_v4hi",
34022 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
34024 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
34025 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
34027 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
34028 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
34030 /* Access to the vec_set patterns. */
34031 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
34032 "__builtin_ia32_vec_set_v2di",
34033 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
34035 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
34036 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
34038 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
34039 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
34041 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
34042 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
34044 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34045 "__builtin_ia32_vec_set_v4hi",
34046 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
34048 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
34049 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
34051 /* RDSEED */
34052 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
34053 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
34054 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
34055 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
34056 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
34057 "__builtin_ia32_rdseed_di_step",
34058 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
34060 /* ADCX */
34061 def_builtin (0, "__builtin_ia32_addcarryx_u32",
34062 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
34063 def_builtin (OPTION_MASK_ISA_64BIT,
34064 "__builtin_ia32_addcarryx_u64",
34065 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34066 IX86_BUILTIN_ADDCARRYX64);
34068 /* SBB */
34069 def_builtin (0, "__builtin_ia32_sbb_u32",
34070 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
34071 def_builtin (OPTION_MASK_ISA_64BIT,
34072 "__builtin_ia32_sbb_u64",
34073 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34074 IX86_BUILTIN_SBB64);
34076 /* Read/write FLAGS. */
34077 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
34078 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34079 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
34080 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34081 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
34082 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
34083 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
34084 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
34086 /* CLFLUSHOPT. */
34087 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
34088 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
34090 /* CLWB. */
34091 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
34092 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
34094 /* Add FMA4 multi-arg argument instructions */
34095 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
34097 if (d->name == 0)
34098 continue;
34100 ftype = (enum ix86_builtin_func_type) d->flag;
34101 def_builtin_const (d->mask, d->name, ftype, d->code);
34105 static void
34106 ix86_init_mpx_builtins ()
34108 const struct builtin_description * d;
34109 enum ix86_builtin_func_type ftype;
34110 tree decl;
34111 size_t i;
34113 for (i = 0, d = bdesc_mpx;
34114 i < ARRAY_SIZE (bdesc_mpx);
34115 i++, d++)
34117 if (d->name == 0)
34118 continue;
34120 ftype = (enum ix86_builtin_func_type) d->flag;
34121 decl = def_builtin (d->mask, d->name, ftype, d->code);
34123 /* With no leaf and nothrow flags for MPX builtins
34124 abnormal edges may follow its call when setjmp
34125 presents in the function. Since we may have a lot
34126 of MPX builtins calls it causes lots of useless
34127 edges and enormous PHI nodes. To avoid this we mark
34128 MPX builtins as leaf and nothrow. */
34129 if (decl)
34131 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34132 NULL_TREE);
34133 TREE_NOTHROW (decl) = 1;
34135 else
34137 ix86_builtins_isa[(int)d->code].leaf_p = true;
34138 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34142 for (i = 0, d = bdesc_mpx_const;
34143 i < ARRAY_SIZE (bdesc_mpx_const);
34144 i++, d++)
34146 if (d->name == 0)
34147 continue;
34149 ftype = (enum ix86_builtin_func_type) d->flag;
34150 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
34152 if (decl)
34154 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34155 NULL_TREE);
34156 TREE_NOTHROW (decl) = 1;
34158 else
34160 ix86_builtins_isa[(int)d->code].leaf_p = true;
34161 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34166 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
34167 to return a pointer to VERSION_DECL if the outcome of the expression
34168 formed by PREDICATE_CHAIN is true. This function will be called during
34169 version dispatch to decide which function version to execute. It returns
34170 the basic block at the end, to which more conditions can be added. */
34172 static basic_block
34173 add_condition_to_bb (tree function_decl, tree version_decl,
34174 tree predicate_chain, basic_block new_bb)
34176 gimple return_stmt;
34177 tree convert_expr, result_var;
34178 gimple convert_stmt;
34179 gimple call_cond_stmt;
34180 gimple if_else_stmt;
34182 basic_block bb1, bb2, bb3;
34183 edge e12, e23;
34185 tree cond_var, and_expr_var = NULL_TREE;
34186 gimple_seq gseq;
34188 tree predicate_decl, predicate_arg;
34190 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
34192 gcc_assert (new_bb != NULL);
34193 gseq = bb_seq (new_bb);
34196 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
34197 build_fold_addr_expr (version_decl));
34198 result_var = create_tmp_var (ptr_type_node);
34199 convert_stmt = gimple_build_assign (result_var, convert_expr);
34200 return_stmt = gimple_build_return (result_var);
34202 if (predicate_chain == NULL_TREE)
34204 gimple_seq_add_stmt (&gseq, convert_stmt);
34205 gimple_seq_add_stmt (&gseq, return_stmt);
34206 set_bb_seq (new_bb, gseq);
34207 gimple_set_bb (convert_stmt, new_bb);
34208 gimple_set_bb (return_stmt, new_bb);
34209 pop_cfun ();
34210 return new_bb;
34213 while (predicate_chain != NULL)
34215 cond_var = create_tmp_var (integer_type_node);
34216 predicate_decl = TREE_PURPOSE (predicate_chain);
34217 predicate_arg = TREE_VALUE (predicate_chain);
34218 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
34219 gimple_call_set_lhs (call_cond_stmt, cond_var);
34221 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
34222 gimple_set_bb (call_cond_stmt, new_bb);
34223 gimple_seq_add_stmt (&gseq, call_cond_stmt);
34225 predicate_chain = TREE_CHAIN (predicate_chain);
34227 if (and_expr_var == NULL)
34228 and_expr_var = cond_var;
34229 else
34231 gimple assign_stmt;
34232 /* Use MIN_EXPR to check if any integer is zero?.
34233 and_expr_var = min_expr <cond_var, and_expr_var> */
34234 assign_stmt = gimple_build_assign (and_expr_var,
34235 build2 (MIN_EXPR, integer_type_node,
34236 cond_var, and_expr_var));
34238 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
34239 gimple_set_bb (assign_stmt, new_bb);
34240 gimple_seq_add_stmt (&gseq, assign_stmt);
34244 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34245 integer_zero_node,
34246 NULL_TREE, NULL_TREE);
34247 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34248 gimple_set_bb (if_else_stmt, new_bb);
34249 gimple_seq_add_stmt (&gseq, if_else_stmt);
34251 gimple_seq_add_stmt (&gseq, convert_stmt);
34252 gimple_seq_add_stmt (&gseq, return_stmt);
34253 set_bb_seq (new_bb, gseq);
34255 bb1 = new_bb;
34256 e12 = split_block (bb1, if_else_stmt);
34257 bb2 = e12->dest;
34258 e12->flags &= ~EDGE_FALLTHRU;
34259 e12->flags |= EDGE_TRUE_VALUE;
34261 e23 = split_block (bb2, return_stmt);
34263 gimple_set_bb (convert_stmt, bb2);
34264 gimple_set_bb (return_stmt, bb2);
34266 bb3 = e23->dest;
34267 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34269 remove_edge (e23);
34270 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34272 pop_cfun ();
34274 return bb3;
34277 /* This parses the attribute arguments to target in DECL and determines
34278 the right builtin to use to match the platform specification.
34279 It returns the priority value for this version decl. If PREDICATE_LIST
34280 is not NULL, it stores the list of cpu features that need to be checked
34281 before dispatching this function. */
34283 static unsigned int
34284 get_builtin_code_for_version (tree decl, tree *predicate_list)
34286 tree attrs;
34287 struct cl_target_option cur_target;
34288 tree target_node;
34289 struct cl_target_option *new_target;
34290 const char *arg_str = NULL;
34291 const char *attrs_str = NULL;
34292 char *tok_str = NULL;
34293 char *token;
34295 /* Priority of i386 features, greater value is higher priority. This is
34296 used to decide the order in which function dispatch must happen. For
34297 instance, a version specialized for SSE4.2 should be checked for dispatch
34298 before a version for SSE3, as SSE4.2 implies SSE3. */
34299 enum feature_priority
34301 P_ZERO = 0,
34302 P_MMX,
34303 P_SSE,
34304 P_SSE2,
34305 P_SSE3,
34306 P_SSSE3,
34307 P_PROC_SSSE3,
34308 P_SSE4_A,
34309 P_PROC_SSE4_A,
34310 P_SSE4_1,
34311 P_SSE4_2,
34312 P_PROC_SSE4_2,
34313 P_POPCNT,
34314 P_AVX,
34315 P_PROC_AVX,
34316 P_BMI,
34317 P_PROC_BMI,
34318 P_FMA4,
34319 P_XOP,
34320 P_PROC_XOP,
34321 P_FMA,
34322 P_PROC_FMA,
34323 P_BMI2,
34324 P_AVX2,
34325 P_PROC_AVX2,
34326 P_AVX512F,
34327 P_PROC_AVX512F
34330 enum feature_priority priority = P_ZERO;
34332 /* These are the target attribute strings for which a dispatcher is
34333 available, from fold_builtin_cpu. */
34335 static struct _feature_list
34337 const char *const name;
34338 const enum feature_priority priority;
34340 const feature_list[] =
34342 {"mmx", P_MMX},
34343 {"sse", P_SSE},
34344 {"sse2", P_SSE2},
34345 {"sse3", P_SSE3},
34346 {"sse4a", P_SSE4_A},
34347 {"ssse3", P_SSSE3},
34348 {"sse4.1", P_SSE4_1},
34349 {"sse4.2", P_SSE4_2},
34350 {"popcnt", P_POPCNT},
34351 {"avx", P_AVX},
34352 {"bmi", P_BMI},
34353 {"fma4", P_FMA4},
34354 {"xop", P_XOP},
34355 {"fma", P_FMA},
34356 {"bmi2", P_BMI2},
34357 {"avx2", P_AVX2},
34358 {"avx512f", P_AVX512F}
34362 static unsigned int NUM_FEATURES
34363 = sizeof (feature_list) / sizeof (struct _feature_list);
34365 unsigned int i;
34367 tree predicate_chain = NULL_TREE;
34368 tree predicate_decl, predicate_arg;
34370 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34371 gcc_assert (attrs != NULL);
34373 attrs = TREE_VALUE (TREE_VALUE (attrs));
34375 gcc_assert (TREE_CODE (attrs) == STRING_CST);
34376 attrs_str = TREE_STRING_POINTER (attrs);
34378 /* Return priority zero for default function. */
34379 if (strcmp (attrs_str, "default") == 0)
34380 return 0;
34382 /* Handle arch= if specified. For priority, set it to be 1 more than
34383 the best instruction set the processor can handle. For instance, if
34384 there is a version for atom and a version for ssse3 (the highest ISA
34385 priority for atom), the atom version must be checked for dispatch
34386 before the ssse3 version. */
34387 if (strstr (attrs_str, "arch=") != NULL)
34389 cl_target_option_save (&cur_target, &global_options);
34390 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34391 &global_options_set);
34393 gcc_assert (target_node);
34394 new_target = TREE_TARGET_OPTION (target_node);
34395 gcc_assert (new_target);
34397 if (new_target->arch_specified && new_target->arch > 0)
34399 switch (new_target->arch)
34401 case PROCESSOR_CORE2:
34402 arg_str = "core2";
34403 priority = P_PROC_SSSE3;
34404 break;
34405 case PROCESSOR_NEHALEM:
34406 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34407 arg_str = "westmere";
34408 else
34409 /* We translate "arch=corei7" and "arch=nehalem" to
34410 "corei7" so that it will be mapped to M_INTEL_COREI7
34411 as cpu type to cover all M_INTEL_COREI7_XXXs. */
34412 arg_str = "corei7";
34413 priority = P_PROC_SSE4_2;
34414 break;
34415 case PROCESSOR_SANDYBRIDGE:
34416 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34417 arg_str = "ivybridge";
34418 else
34419 arg_str = "sandybridge";
34420 priority = P_PROC_AVX;
34421 break;
34422 case PROCESSOR_HASWELL:
34423 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34424 arg_str = "broadwell";
34425 else
34426 arg_str = "haswell";
34427 priority = P_PROC_AVX2;
34428 break;
34429 case PROCESSOR_BONNELL:
34430 arg_str = "bonnell";
34431 priority = P_PROC_SSSE3;
34432 break;
34433 case PROCESSOR_KNL:
34434 arg_str = "knl";
34435 priority = P_PROC_AVX512F;
34436 break;
34437 case PROCESSOR_SILVERMONT:
34438 arg_str = "silvermont";
34439 priority = P_PROC_SSE4_2;
34440 break;
34441 case PROCESSOR_AMDFAM10:
34442 arg_str = "amdfam10h";
34443 priority = P_PROC_SSE4_A;
34444 break;
34445 case PROCESSOR_BTVER1:
34446 arg_str = "btver1";
34447 priority = P_PROC_SSE4_A;
34448 break;
34449 case PROCESSOR_BTVER2:
34450 arg_str = "btver2";
34451 priority = P_PROC_BMI;
34452 break;
34453 case PROCESSOR_BDVER1:
34454 arg_str = "bdver1";
34455 priority = P_PROC_XOP;
34456 break;
34457 case PROCESSOR_BDVER2:
34458 arg_str = "bdver2";
34459 priority = P_PROC_FMA;
34460 break;
34461 case PROCESSOR_BDVER3:
34462 arg_str = "bdver3";
34463 priority = P_PROC_FMA;
34464 break;
34465 case PROCESSOR_BDVER4:
34466 arg_str = "bdver4";
34467 priority = P_PROC_AVX2;
34468 break;
34472 cl_target_option_restore (&global_options, &cur_target);
34474 if (predicate_list && arg_str == NULL)
34476 error_at (DECL_SOURCE_LOCATION (decl),
34477 "No dispatcher found for the versioning attributes");
34478 return 0;
34481 if (predicate_list)
34483 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34484 /* For a C string literal the length includes the trailing NULL. */
34485 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34486 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34487 predicate_chain);
34491 /* Process feature name. */
34492 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
34493 strcpy (tok_str, attrs_str);
34494 token = strtok (tok_str, ",");
34495 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34497 while (token != NULL)
34499 /* Do not process "arch=" */
34500 if (strncmp (token, "arch=", 5) == 0)
34502 token = strtok (NULL, ",");
34503 continue;
34505 for (i = 0; i < NUM_FEATURES; ++i)
34507 if (strcmp (token, feature_list[i].name) == 0)
34509 if (predicate_list)
34511 predicate_arg = build_string_literal (
34512 strlen (feature_list[i].name) + 1,
34513 feature_list[i].name);
34514 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34515 predicate_chain);
34517 /* Find the maximum priority feature. */
34518 if (feature_list[i].priority > priority)
34519 priority = feature_list[i].priority;
34521 break;
34524 if (predicate_list && i == NUM_FEATURES)
34526 error_at (DECL_SOURCE_LOCATION (decl),
34527 "No dispatcher found for %s", token);
34528 return 0;
34530 token = strtok (NULL, ",");
34532 free (tok_str);
34534 if (predicate_list && predicate_chain == NULL_TREE)
34536 error_at (DECL_SOURCE_LOCATION (decl),
34537 "No dispatcher found for the versioning attributes : %s",
34538 attrs_str);
34539 return 0;
34541 else if (predicate_list)
34543 predicate_chain = nreverse (predicate_chain);
34544 *predicate_list = predicate_chain;
34547 return priority;
34550 /* This compares the priority of target features in function DECL1
34551 and DECL2. It returns positive value if DECL1 is higher priority,
34552 negative value if DECL2 is higher priority and 0 if they are the
34553 same. */
34555 static int
34556 ix86_compare_version_priority (tree decl1, tree decl2)
34558 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34559 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34561 return (int)priority1 - (int)priority2;
34564 /* V1 and V2 point to function versions with different priorities
34565 based on the target ISA. This function compares their priorities. */
34567 static int
34568 feature_compare (const void *v1, const void *v2)
34570 typedef struct _function_version_info
34572 tree version_decl;
34573 tree predicate_chain;
34574 unsigned int dispatch_priority;
34575 } function_version_info;
34577 const function_version_info c1 = *(const function_version_info *)v1;
34578 const function_version_info c2 = *(const function_version_info *)v2;
34579 return (c2.dispatch_priority - c1.dispatch_priority);
34582 /* This function generates the dispatch function for
34583 multi-versioned functions. DISPATCH_DECL is the function which will
34584 contain the dispatch logic. FNDECLS are the function choices for
34585 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
34586 in DISPATCH_DECL in which the dispatch code is generated. */
34588 static int
34589 dispatch_function_versions (tree dispatch_decl,
34590 void *fndecls_p,
34591 basic_block *empty_bb)
34593 tree default_decl;
34594 gimple ifunc_cpu_init_stmt;
34595 gimple_seq gseq;
34596 int ix;
34597 tree ele;
34598 vec<tree> *fndecls;
34599 unsigned int num_versions = 0;
34600 unsigned int actual_versions = 0;
34601 unsigned int i;
34603 struct _function_version_info
34605 tree version_decl;
34606 tree predicate_chain;
34607 unsigned int dispatch_priority;
34608 }*function_version_info;
34610 gcc_assert (dispatch_decl != NULL
34611 && fndecls_p != NULL
34612 && empty_bb != NULL);
34614 /*fndecls_p is actually a vector. */
34615 fndecls = static_cast<vec<tree> *> (fndecls_p);
34617 /* At least one more version other than the default. */
34618 num_versions = fndecls->length ();
34619 gcc_assert (num_versions >= 2);
34621 function_version_info = (struct _function_version_info *)
34622 XNEWVEC (struct _function_version_info, (num_versions - 1));
34624 /* The first version in the vector is the default decl. */
34625 default_decl = (*fndecls)[0];
34627 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34629 gseq = bb_seq (*empty_bb);
34630 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
34631 constructors, so explicity call __builtin_cpu_init here. */
34632 ifunc_cpu_init_stmt = gimple_build_call_vec (
34633 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34634 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34635 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34636 set_bb_seq (*empty_bb, gseq);
34638 pop_cfun ();
34641 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34643 tree version_decl = ele;
34644 tree predicate_chain = NULL_TREE;
34645 unsigned int priority;
34646 /* Get attribute string, parse it and find the right predicate decl.
34647 The predicate function could be a lengthy combination of many
34648 features, like arch-type and various isa-variants. */
34649 priority = get_builtin_code_for_version (version_decl,
34650 &predicate_chain);
34652 if (predicate_chain == NULL_TREE)
34653 continue;
34655 function_version_info [actual_versions].version_decl = version_decl;
34656 function_version_info [actual_versions].predicate_chain
34657 = predicate_chain;
34658 function_version_info [actual_versions].dispatch_priority = priority;
34659 actual_versions++;
34662 /* Sort the versions according to descending order of dispatch priority. The
34663 priority is based on the ISA. This is not a perfect solution. There
34664 could still be ambiguity. If more than one function version is suitable
34665 to execute, which one should be dispatched? In future, allow the user
34666 to specify a dispatch priority next to the version. */
34667 qsort (function_version_info, actual_versions,
34668 sizeof (struct _function_version_info), feature_compare);
34670 for (i = 0; i < actual_versions; ++i)
34671 *empty_bb = add_condition_to_bb (dispatch_decl,
34672 function_version_info[i].version_decl,
34673 function_version_info[i].predicate_chain,
34674 *empty_bb);
34676 /* dispatch default version at the end. */
34677 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34678 NULL, *empty_bb);
34680 free (function_version_info);
34681 return 0;
34684 /* Comparator function to be used in qsort routine to sort attribute
34685 specification strings to "target". */
34687 static int
34688 attr_strcmp (const void *v1, const void *v2)
34690 const char *c1 = *(char *const*)v1;
34691 const char *c2 = *(char *const*)v2;
34692 return strcmp (c1, c2);
34695 /* ARGLIST is the argument to target attribute. This function tokenizes
34696 the comma separated arguments, sorts them and returns a string which
34697 is a unique identifier for the comma separated arguments. It also
34698 replaces non-identifier characters "=,-" with "_". */
34700 static char *
34701 sorted_attr_string (tree arglist)
34703 tree arg;
34704 size_t str_len_sum = 0;
34705 char **args = NULL;
34706 char *attr_str, *ret_str;
34707 char *attr = NULL;
34708 unsigned int argnum = 1;
34709 unsigned int i;
34711 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34713 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34714 size_t len = strlen (str);
34715 str_len_sum += len + 1;
34716 if (arg != arglist)
34717 argnum++;
34718 for (i = 0; i < strlen (str); i++)
34719 if (str[i] == ',')
34720 argnum++;
34723 attr_str = XNEWVEC (char, str_len_sum);
34724 str_len_sum = 0;
34725 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34727 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34728 size_t len = strlen (str);
34729 memcpy (attr_str + str_len_sum, str, len);
34730 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
34731 str_len_sum += len + 1;
34734 /* Replace "=,-" with "_". */
34735 for (i = 0; i < strlen (attr_str); i++)
34736 if (attr_str[i] == '=' || attr_str[i]== '-')
34737 attr_str[i] = '_';
34739 if (argnum == 1)
34740 return attr_str;
34742 args = XNEWVEC (char *, argnum);
34744 i = 0;
34745 attr = strtok (attr_str, ",");
34746 while (attr != NULL)
34748 args[i] = attr;
34749 i++;
34750 attr = strtok (NULL, ",");
34753 qsort (args, argnum, sizeof (char *), attr_strcmp);
34755 ret_str = XNEWVEC (char, str_len_sum);
34756 str_len_sum = 0;
34757 for (i = 0; i < argnum; i++)
34759 size_t len = strlen (args[i]);
34760 memcpy (ret_str + str_len_sum, args[i], len);
34761 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
34762 str_len_sum += len + 1;
34765 XDELETEVEC (args);
34766 XDELETEVEC (attr_str);
34767 return ret_str;
34770 /* This function changes the assembler name for functions that are
34771 versions. If DECL is a function version and has a "target"
34772 attribute, it appends the attribute string to its assembler name. */
34774 static tree
34775 ix86_mangle_function_version_assembler_name (tree decl, tree id)
34777 tree version_attr;
34778 const char *orig_name, *version_string;
34779 char *attr_str, *assembler_name;
34781 if (DECL_DECLARED_INLINE_P (decl)
34782 && lookup_attribute ("gnu_inline",
34783 DECL_ATTRIBUTES (decl)))
34784 error_at (DECL_SOURCE_LOCATION (decl),
34785 "Function versions cannot be marked as gnu_inline,"
34786 " bodies have to be generated");
34788 if (DECL_VIRTUAL_P (decl)
34789 || DECL_VINDEX (decl))
34790 sorry ("Virtual function multiversioning not supported");
34792 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34794 /* target attribute string cannot be NULL. */
34795 gcc_assert (version_attr != NULL_TREE);
34797 orig_name = IDENTIFIER_POINTER (id);
34798 version_string
34799 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
34801 if (strcmp (version_string, "default") == 0)
34802 return id;
34804 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
34805 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
34807 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
34809 /* Allow assembler name to be modified if already set. */
34810 if (DECL_ASSEMBLER_NAME_SET_P (decl))
34811 SET_DECL_RTL (decl, NULL);
34813 tree ret = get_identifier (assembler_name);
34814 XDELETEVEC (attr_str);
34815 XDELETEVEC (assembler_name);
34816 return ret;
34819 /* This function returns true if FN1 and FN2 are versions of the same function,
34820 that is, the target strings of the function decls are different. This assumes
34821 that FN1 and FN2 have the same signature. */
34823 static bool
34824 ix86_function_versions (tree fn1, tree fn2)
34826 tree attr1, attr2;
34827 char *target1, *target2;
34828 bool result;
34830 if (TREE_CODE (fn1) != FUNCTION_DECL
34831 || TREE_CODE (fn2) != FUNCTION_DECL)
34832 return false;
34834 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
34835 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
34837 /* At least one function decl should have the target attribute specified. */
34838 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
34839 return false;
34841 /* Diagnose missing target attribute if one of the decls is already
34842 multi-versioned. */
34843 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
34845 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
34847 if (attr2 != NULL_TREE)
34849 tree tem = fn1;
34850 fn1 = fn2;
34851 fn2 = tem;
34852 attr1 = attr2;
34854 error_at (DECL_SOURCE_LOCATION (fn2),
34855 "missing %<target%> attribute for multi-versioned %D",
34856 fn2);
34857 inform (DECL_SOURCE_LOCATION (fn1),
34858 "previous declaration of %D", fn1);
34859 /* Prevent diagnosing of the same error multiple times. */
34860 DECL_ATTRIBUTES (fn2)
34861 = tree_cons (get_identifier ("target"),
34862 copy_node (TREE_VALUE (attr1)),
34863 DECL_ATTRIBUTES (fn2));
34865 return false;
34868 target1 = sorted_attr_string (TREE_VALUE (attr1));
34869 target2 = sorted_attr_string (TREE_VALUE (attr2));
34871 /* The sorted target strings must be different for fn1 and fn2
34872 to be versions. */
34873 if (strcmp (target1, target2) == 0)
34874 result = false;
34875 else
34876 result = true;
34878 XDELETEVEC (target1);
34879 XDELETEVEC (target2);
34881 return result;
34884 static tree
34885 ix86_mangle_decl_assembler_name (tree decl, tree id)
34887 /* For function version, add the target suffix to the assembler name. */
34888 if (TREE_CODE (decl) == FUNCTION_DECL
34889 && DECL_FUNCTION_VERSIONED (decl))
34890 id = ix86_mangle_function_version_assembler_name (decl, id);
34891 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
34892 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
34893 #endif
34895 return id;
34898 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
34899 is true, append the full path name of the source file. */
34901 static char *
34902 make_name (tree decl, const char *suffix, bool make_unique)
34904 char *global_var_name;
34905 int name_len;
34906 const char *name;
34907 const char *unique_name = NULL;
34909 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
34911 /* Get a unique name that can be used globally without any chances
34912 of collision at link time. */
34913 if (make_unique)
34914 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
34916 name_len = strlen (name) + strlen (suffix) + 2;
34918 if (make_unique)
34919 name_len += strlen (unique_name) + 1;
34920 global_var_name = XNEWVEC (char, name_len);
34922 /* Use '.' to concatenate names as it is demangler friendly. */
34923 if (make_unique)
34924 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
34925 suffix);
34926 else
34927 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
34929 return global_var_name;
34932 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
34934 /* Make a dispatcher declaration for the multi-versioned function DECL.
34935 Calls to DECL function will be replaced with calls to the dispatcher
34936 by the front-end. Return the decl created. */
34938 static tree
34939 make_dispatcher_decl (const tree decl)
34941 tree func_decl;
34942 char *func_name;
34943 tree fn_type, func_type;
34944 bool is_uniq = false;
34946 if (TREE_PUBLIC (decl) == 0)
34947 is_uniq = true;
34949 func_name = make_name (decl, "ifunc", is_uniq);
34951 fn_type = TREE_TYPE (decl);
34952 func_type = build_function_type (TREE_TYPE (fn_type),
34953 TYPE_ARG_TYPES (fn_type));
34955 func_decl = build_fn_decl (func_name, func_type);
34956 XDELETEVEC (func_name);
34957 TREE_USED (func_decl) = 1;
34958 DECL_CONTEXT (func_decl) = NULL_TREE;
34959 DECL_INITIAL (func_decl) = error_mark_node;
34960 DECL_ARTIFICIAL (func_decl) = 1;
34961 /* Mark this func as external, the resolver will flip it again if
34962 it gets generated. */
34963 DECL_EXTERNAL (func_decl) = 1;
34964 /* This will be of type IFUNCs have to be externally visible. */
34965 TREE_PUBLIC (func_decl) = 1;
34967 return func_decl;
34970 #endif
34972 /* Returns true if decl is multi-versioned and DECL is the default function,
34973 that is it is not tagged with target specific optimization. */
34975 static bool
34976 is_function_default_version (const tree decl)
34978 if (TREE_CODE (decl) != FUNCTION_DECL
34979 || !DECL_FUNCTION_VERSIONED (decl))
34980 return false;
34981 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34982 gcc_assert (attr);
34983 attr = TREE_VALUE (TREE_VALUE (attr));
34984 return (TREE_CODE (attr) == STRING_CST
34985 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
34988 /* Make a dispatcher declaration for the multi-versioned function DECL.
34989 Calls to DECL function will be replaced with calls to the dispatcher
34990 by the front-end. Returns the decl of the dispatcher function. */
34992 static tree
34993 ix86_get_function_versions_dispatcher (void *decl)
34995 tree fn = (tree) decl;
34996 struct cgraph_node *node = NULL;
34997 struct cgraph_node *default_node = NULL;
34998 struct cgraph_function_version_info *node_v = NULL;
34999 struct cgraph_function_version_info *first_v = NULL;
35001 tree dispatch_decl = NULL;
35003 struct cgraph_function_version_info *default_version_info = NULL;
35005 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
35007 node = cgraph_node::get (fn);
35008 gcc_assert (node != NULL);
35010 node_v = node->function_version ();
35011 gcc_assert (node_v != NULL);
35013 if (node_v->dispatcher_resolver != NULL)
35014 return node_v->dispatcher_resolver;
35016 /* Find the default version and make it the first node. */
35017 first_v = node_v;
35018 /* Go to the beginning of the chain. */
35019 while (first_v->prev != NULL)
35020 first_v = first_v->prev;
35021 default_version_info = first_v;
35022 while (default_version_info != NULL)
35024 if (is_function_default_version
35025 (default_version_info->this_node->decl))
35026 break;
35027 default_version_info = default_version_info->next;
35030 /* If there is no default node, just return NULL. */
35031 if (default_version_info == NULL)
35032 return NULL;
35034 /* Make default info the first node. */
35035 if (first_v != default_version_info)
35037 default_version_info->prev->next = default_version_info->next;
35038 if (default_version_info->next)
35039 default_version_info->next->prev = default_version_info->prev;
35040 first_v->prev = default_version_info;
35041 default_version_info->next = first_v;
35042 default_version_info->prev = NULL;
35045 default_node = default_version_info->this_node;
35047 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35048 if (targetm.has_ifunc_p ())
35050 struct cgraph_function_version_info *it_v = NULL;
35051 struct cgraph_node *dispatcher_node = NULL;
35052 struct cgraph_function_version_info *dispatcher_version_info = NULL;
35054 /* Right now, the dispatching is done via ifunc. */
35055 dispatch_decl = make_dispatcher_decl (default_node->decl);
35057 dispatcher_node = cgraph_node::get_create (dispatch_decl);
35058 gcc_assert (dispatcher_node != NULL);
35059 dispatcher_node->dispatcher_function = 1;
35060 dispatcher_version_info
35061 = dispatcher_node->insert_new_function_version ();
35062 dispatcher_version_info->next = default_version_info;
35063 dispatcher_node->definition = 1;
35065 /* Set the dispatcher for all the versions. */
35066 it_v = default_version_info;
35067 while (it_v != NULL)
35069 it_v->dispatcher_resolver = dispatch_decl;
35070 it_v = it_v->next;
35073 else
35074 #endif
35076 error_at (DECL_SOURCE_LOCATION (default_node->decl),
35077 "multiversioning needs ifunc which is not supported "
35078 "on this target");
35081 return dispatch_decl;
35084 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
35085 it to CHAIN. */
35087 static tree
35088 make_attribute (const char *name, const char *arg_name, tree chain)
35090 tree attr_name;
35091 tree attr_arg_name;
35092 tree attr_args;
35093 tree attr;
35095 attr_name = get_identifier (name);
35096 attr_arg_name = build_string (strlen (arg_name), arg_name);
35097 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
35098 attr = tree_cons (attr_name, attr_args, chain);
35099 return attr;
35102 /* Make the resolver function decl to dispatch the versions of
35103 a multi-versioned function, DEFAULT_DECL. Create an
35104 empty basic block in the resolver and store the pointer in
35105 EMPTY_BB. Return the decl of the resolver function. */
35107 static tree
35108 make_resolver_func (const tree default_decl,
35109 const tree dispatch_decl,
35110 basic_block *empty_bb)
35112 char *resolver_name;
35113 tree decl, type, decl_name, t;
35114 bool is_uniq = false;
35116 /* IFUNC's have to be globally visible. So, if the default_decl is
35117 not, then the name of the IFUNC should be made unique. */
35118 if (TREE_PUBLIC (default_decl) == 0)
35119 is_uniq = true;
35121 /* Append the filename to the resolver function if the versions are
35122 not externally visible. This is because the resolver function has
35123 to be externally visible for the loader to find it. So, appending
35124 the filename will prevent conflicts with a resolver function from
35125 another module which is based on the same version name. */
35126 resolver_name = make_name (default_decl, "resolver", is_uniq);
35128 /* The resolver function should return a (void *). */
35129 type = build_function_type_list (ptr_type_node, NULL_TREE);
35131 decl = build_fn_decl (resolver_name, type);
35132 decl_name = get_identifier (resolver_name);
35133 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
35135 DECL_NAME (decl) = decl_name;
35136 TREE_USED (decl) = 1;
35137 DECL_ARTIFICIAL (decl) = 1;
35138 DECL_IGNORED_P (decl) = 0;
35139 /* IFUNC resolvers have to be externally visible. */
35140 TREE_PUBLIC (decl) = 1;
35141 DECL_UNINLINABLE (decl) = 1;
35143 /* Resolver is not external, body is generated. */
35144 DECL_EXTERNAL (decl) = 0;
35145 DECL_EXTERNAL (dispatch_decl) = 0;
35147 DECL_CONTEXT (decl) = NULL_TREE;
35148 DECL_INITIAL (decl) = make_node (BLOCK);
35149 DECL_STATIC_CONSTRUCTOR (decl) = 0;
35151 if (DECL_COMDAT_GROUP (default_decl)
35152 || TREE_PUBLIC (default_decl))
35154 /* In this case, each translation unit with a call to this
35155 versioned function will put out a resolver. Ensure it
35156 is comdat to keep just one copy. */
35157 DECL_COMDAT (decl) = 1;
35158 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
35160 /* Build result decl and add to function_decl. */
35161 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
35162 DECL_ARTIFICIAL (t) = 1;
35163 DECL_IGNORED_P (t) = 1;
35164 DECL_RESULT (decl) = t;
35166 gimplify_function_tree (decl);
35167 push_cfun (DECL_STRUCT_FUNCTION (decl));
35168 *empty_bb = init_lowered_empty_function (decl, false, 0);
35170 cgraph_node::add_new_function (decl, true);
35171 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
35173 pop_cfun ();
35175 gcc_assert (dispatch_decl != NULL);
35176 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
35177 DECL_ATTRIBUTES (dispatch_decl)
35178 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
35180 /* Create the alias for dispatch to resolver here. */
35181 /*cgraph_create_function_alias (dispatch_decl, decl);*/
35182 cgraph_node::create_same_body_alias (dispatch_decl, decl);
35183 XDELETEVEC (resolver_name);
35184 return decl;
35187 /* Generate the dispatching code body to dispatch multi-versioned function
35188 DECL. The target hook is called to process the "target" attributes and
35189 provide the code to dispatch the right function at run-time. NODE points
35190 to the dispatcher decl whose body will be created. */
35192 static tree
35193 ix86_generate_version_dispatcher_body (void *node_p)
35195 tree resolver_decl;
35196 basic_block empty_bb;
35197 tree default_ver_decl;
35198 struct cgraph_node *versn;
35199 struct cgraph_node *node;
35201 struct cgraph_function_version_info *node_version_info = NULL;
35202 struct cgraph_function_version_info *versn_info = NULL;
35204 node = (cgraph_node *)node_p;
35206 node_version_info = node->function_version ();
35207 gcc_assert (node->dispatcher_function
35208 && node_version_info != NULL);
35210 if (node_version_info->dispatcher_resolver)
35211 return node_version_info->dispatcher_resolver;
35213 /* The first version in the chain corresponds to the default version. */
35214 default_ver_decl = node_version_info->next->this_node->decl;
35216 /* node is going to be an alias, so remove the finalized bit. */
35217 node->definition = false;
35219 resolver_decl = make_resolver_func (default_ver_decl,
35220 node->decl, &empty_bb);
35222 node_version_info->dispatcher_resolver = resolver_decl;
35224 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
35226 auto_vec<tree, 2> fn_ver_vec;
35228 for (versn_info = node_version_info->next; versn_info;
35229 versn_info = versn_info->next)
35231 versn = versn_info->this_node;
35232 /* Check for virtual functions here again, as by this time it should
35233 have been determined if this function needs a vtable index or
35234 not. This happens for methods in derived classes that override
35235 virtual methods in base classes but are not explicitly marked as
35236 virtual. */
35237 if (DECL_VINDEX (versn->decl))
35238 sorry ("Virtual function multiversioning not supported");
35240 fn_ver_vec.safe_push (versn->decl);
35243 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
35244 cgraph_edge::rebuild_edges ();
35245 pop_cfun ();
35246 return resolver_decl;
35248 /* This builds the processor_model struct type defined in
35249 libgcc/config/i386/cpuinfo.c */
35251 static tree
35252 build_processor_model_struct (void)
35254 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
35255 "__cpu_features"};
35256 tree field = NULL_TREE, field_chain = NULL_TREE;
35257 int i;
35258 tree type = make_node (RECORD_TYPE);
35260 /* The first 3 fields are unsigned int. */
35261 for (i = 0; i < 3; ++i)
35263 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35264 get_identifier (field_name[i]), unsigned_type_node);
35265 if (field_chain != NULL_TREE)
35266 DECL_CHAIN (field) = field_chain;
35267 field_chain = field;
35270 /* The last field is an array of unsigned integers of size one. */
35271 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35272 get_identifier (field_name[3]),
35273 build_array_type (unsigned_type_node,
35274 build_index_type (size_one_node)));
35275 if (field_chain != NULL_TREE)
35276 DECL_CHAIN (field) = field_chain;
35277 field_chain = field;
35279 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35280 return type;
35283 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35285 static tree
35286 make_var_decl (tree type, const char *name)
35288 tree new_decl;
35290 new_decl = build_decl (UNKNOWN_LOCATION,
35291 VAR_DECL,
35292 get_identifier(name),
35293 type);
35295 DECL_EXTERNAL (new_decl) = 1;
35296 TREE_STATIC (new_decl) = 1;
35297 TREE_PUBLIC (new_decl) = 1;
35298 DECL_INITIAL (new_decl) = 0;
35299 DECL_ARTIFICIAL (new_decl) = 0;
35300 DECL_PRESERVE_P (new_decl) = 1;
35302 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35303 assemble_variable (new_decl, 0, 0, 0);
35305 return new_decl;
35308 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35309 into an integer defined in libgcc/config/i386/cpuinfo.c */
35311 static tree
35312 fold_builtin_cpu (tree fndecl, tree *args)
35314 unsigned int i;
35315 enum ix86_builtins fn_code = (enum ix86_builtins)
35316 DECL_FUNCTION_CODE (fndecl);
35317 tree param_string_cst = NULL;
35319 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35320 enum processor_features
35322 F_CMOV = 0,
35323 F_MMX,
35324 F_POPCNT,
35325 F_SSE,
35326 F_SSE2,
35327 F_SSE3,
35328 F_SSSE3,
35329 F_SSE4_1,
35330 F_SSE4_2,
35331 F_AVX,
35332 F_AVX2,
35333 F_SSE4_A,
35334 F_FMA4,
35335 F_XOP,
35336 F_FMA,
35337 F_AVX512F,
35338 F_BMI,
35339 F_BMI2,
35340 F_MAX
35343 /* These are the values for vendor types and cpu types and subtypes
35344 in cpuinfo.c. Cpu types and subtypes should be subtracted by
35345 the corresponding start value. */
35346 enum processor_model
35348 M_INTEL = 1,
35349 M_AMD,
35350 M_CPU_TYPE_START,
35351 M_INTEL_BONNELL,
35352 M_INTEL_CORE2,
35353 M_INTEL_COREI7,
35354 M_AMDFAM10H,
35355 M_AMDFAM15H,
35356 M_INTEL_SILVERMONT,
35357 M_INTEL_KNL,
35358 M_AMD_BTVER1,
35359 M_AMD_BTVER2,
35360 M_CPU_SUBTYPE_START,
35361 M_INTEL_COREI7_NEHALEM,
35362 M_INTEL_COREI7_WESTMERE,
35363 M_INTEL_COREI7_SANDYBRIDGE,
35364 M_AMDFAM10H_BARCELONA,
35365 M_AMDFAM10H_SHANGHAI,
35366 M_AMDFAM10H_ISTANBUL,
35367 M_AMDFAM15H_BDVER1,
35368 M_AMDFAM15H_BDVER2,
35369 M_AMDFAM15H_BDVER3,
35370 M_AMDFAM15H_BDVER4,
35371 M_INTEL_COREI7_IVYBRIDGE,
35372 M_INTEL_COREI7_HASWELL,
35373 M_INTEL_COREI7_BROADWELL
35376 static struct _arch_names_table
35378 const char *const name;
35379 const enum processor_model model;
35381 const arch_names_table[] =
35383 {"amd", M_AMD},
35384 {"intel", M_INTEL},
35385 {"atom", M_INTEL_BONNELL},
35386 {"slm", M_INTEL_SILVERMONT},
35387 {"core2", M_INTEL_CORE2},
35388 {"corei7", M_INTEL_COREI7},
35389 {"nehalem", M_INTEL_COREI7_NEHALEM},
35390 {"westmere", M_INTEL_COREI7_WESTMERE},
35391 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35392 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35393 {"haswell", M_INTEL_COREI7_HASWELL},
35394 {"broadwell", M_INTEL_COREI7_BROADWELL},
35395 {"bonnell", M_INTEL_BONNELL},
35396 {"silvermont", M_INTEL_SILVERMONT},
35397 {"knl", M_INTEL_KNL},
35398 {"amdfam10h", M_AMDFAM10H},
35399 {"barcelona", M_AMDFAM10H_BARCELONA},
35400 {"shanghai", M_AMDFAM10H_SHANGHAI},
35401 {"istanbul", M_AMDFAM10H_ISTANBUL},
35402 {"btver1", M_AMD_BTVER1},
35403 {"amdfam15h", M_AMDFAM15H},
35404 {"bdver1", M_AMDFAM15H_BDVER1},
35405 {"bdver2", M_AMDFAM15H_BDVER2},
35406 {"bdver3", M_AMDFAM15H_BDVER3},
35407 {"bdver4", M_AMDFAM15H_BDVER4},
35408 {"btver2", M_AMD_BTVER2},
35411 static struct _isa_names_table
35413 const char *const name;
35414 const enum processor_features feature;
35416 const isa_names_table[] =
35418 {"cmov", F_CMOV},
35419 {"mmx", F_MMX},
35420 {"popcnt", F_POPCNT},
35421 {"sse", F_SSE},
35422 {"sse2", F_SSE2},
35423 {"sse3", F_SSE3},
35424 {"ssse3", F_SSSE3},
35425 {"sse4a", F_SSE4_A},
35426 {"sse4.1", F_SSE4_1},
35427 {"sse4.2", F_SSE4_2},
35428 {"avx", F_AVX},
35429 {"fma4", F_FMA4},
35430 {"xop", F_XOP},
35431 {"fma", F_FMA},
35432 {"avx2", F_AVX2},
35433 {"avx512f",F_AVX512F},
35434 {"bmi", F_BMI},
35435 {"bmi2", F_BMI2}
35438 tree __processor_model_type = build_processor_model_struct ();
35439 tree __cpu_model_var = make_var_decl (__processor_model_type,
35440 "__cpu_model");
35443 varpool_node::add (__cpu_model_var);
35445 gcc_assert ((args != NULL) && (*args != NULL));
35447 param_string_cst = *args;
35448 while (param_string_cst
35449 && TREE_CODE (param_string_cst) != STRING_CST)
35451 /* *args must be a expr that can contain other EXPRS leading to a
35452 STRING_CST. */
35453 if (!EXPR_P (param_string_cst))
35455 error ("Parameter to builtin must be a string constant or literal");
35456 return integer_zero_node;
35458 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35461 gcc_assert (param_string_cst);
35463 if (fn_code == IX86_BUILTIN_CPU_IS)
35465 tree ref;
35466 tree field;
35467 tree final;
35469 unsigned int field_val = 0;
35470 unsigned int NUM_ARCH_NAMES
35471 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35473 for (i = 0; i < NUM_ARCH_NAMES; i++)
35474 if (strcmp (arch_names_table[i].name,
35475 TREE_STRING_POINTER (param_string_cst)) == 0)
35476 break;
35478 if (i == NUM_ARCH_NAMES)
35480 error ("Parameter to builtin not valid: %s",
35481 TREE_STRING_POINTER (param_string_cst));
35482 return integer_zero_node;
35485 field = TYPE_FIELDS (__processor_model_type);
35486 field_val = arch_names_table[i].model;
35488 /* CPU types are stored in the next field. */
35489 if (field_val > M_CPU_TYPE_START
35490 && field_val < M_CPU_SUBTYPE_START)
35492 field = DECL_CHAIN (field);
35493 field_val -= M_CPU_TYPE_START;
35496 /* CPU subtypes are stored in the next field. */
35497 if (field_val > M_CPU_SUBTYPE_START)
35499 field = DECL_CHAIN ( DECL_CHAIN (field));
35500 field_val -= M_CPU_SUBTYPE_START;
35503 /* Get the appropriate field in __cpu_model. */
35504 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35505 field, NULL_TREE);
35507 /* Check the value. */
35508 final = build2 (EQ_EXPR, unsigned_type_node, ref,
35509 build_int_cstu (unsigned_type_node, field_val));
35510 return build1 (CONVERT_EXPR, integer_type_node, final);
35512 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35514 tree ref;
35515 tree array_elt;
35516 tree field;
35517 tree final;
35519 unsigned int field_val = 0;
35520 unsigned int NUM_ISA_NAMES
35521 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35523 for (i = 0; i < NUM_ISA_NAMES; i++)
35524 if (strcmp (isa_names_table[i].name,
35525 TREE_STRING_POINTER (param_string_cst)) == 0)
35526 break;
35528 if (i == NUM_ISA_NAMES)
35530 error ("Parameter to builtin not valid: %s",
35531 TREE_STRING_POINTER (param_string_cst));
35532 return integer_zero_node;
35535 field = TYPE_FIELDS (__processor_model_type);
35536 /* Get the last field, which is __cpu_features. */
35537 while (DECL_CHAIN (field))
35538 field = DECL_CHAIN (field);
35540 /* Get the appropriate field: __cpu_model.__cpu_features */
35541 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35542 field, NULL_TREE);
35544 /* Access the 0th element of __cpu_features array. */
35545 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35546 integer_zero_node, NULL_TREE, NULL_TREE);
35548 field_val = (1 << isa_names_table[i].feature);
35549 /* Return __cpu_model.__cpu_features[0] & field_val */
35550 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35551 build_int_cstu (unsigned_type_node, field_val));
35552 return build1 (CONVERT_EXPR, integer_type_node, final);
35554 gcc_unreachable ();
35557 static tree
35558 ix86_fold_builtin (tree fndecl, int n_args,
35559 tree *args, bool ignore ATTRIBUTE_UNUSED)
35561 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35563 enum ix86_builtins fn_code = (enum ix86_builtins)
35564 DECL_FUNCTION_CODE (fndecl);
35565 if (fn_code == IX86_BUILTIN_CPU_IS
35566 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35568 gcc_assert (n_args == 1);
35569 return fold_builtin_cpu (fndecl, args);
35573 #ifdef SUBTARGET_FOLD_BUILTIN
35574 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35575 #endif
35577 return NULL_TREE;
35580 /* Make builtins to detect cpu type and features supported. NAME is
35581 the builtin name, CODE is the builtin code, and FTYPE is the function
35582 type of the builtin. */
35584 static void
35585 make_cpu_type_builtin (const char* name, int code,
35586 enum ix86_builtin_func_type ftype, bool is_const)
35588 tree decl;
35589 tree type;
35591 type = ix86_get_builtin_func_type (ftype);
35592 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35593 NULL, NULL_TREE);
35594 gcc_assert (decl != NULL_TREE);
35595 ix86_builtins[(int) code] = decl;
35596 TREE_READONLY (decl) = is_const;
35599 /* Make builtins to get CPU type and features supported. The created
35600 builtins are :
35602 __builtin_cpu_init (), to detect cpu type and features,
35603 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35604 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35607 static void
35608 ix86_init_platform_type_builtins (void)
35610 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35611 INT_FTYPE_VOID, false);
35612 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35613 INT_FTYPE_PCCHAR, true);
35614 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35615 INT_FTYPE_PCCHAR, true);
35618 /* Internal method for ix86_init_builtins. */
35620 static void
35621 ix86_init_builtins_va_builtins_abi (void)
35623 tree ms_va_ref, sysv_va_ref;
35624 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35625 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35626 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35627 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35629 if (!TARGET_64BIT)
35630 return;
35631 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35632 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35633 ms_va_ref = build_reference_type (ms_va_list_type_node);
35634 sysv_va_ref =
35635 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35637 fnvoid_va_end_ms =
35638 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35639 fnvoid_va_start_ms =
35640 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35641 fnvoid_va_end_sysv =
35642 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35643 fnvoid_va_start_sysv =
35644 build_varargs_function_type_list (void_type_node, sysv_va_ref,
35645 NULL_TREE);
35646 fnvoid_va_copy_ms =
35647 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35648 NULL_TREE);
35649 fnvoid_va_copy_sysv =
35650 build_function_type_list (void_type_node, sysv_va_ref,
35651 sysv_va_ref, NULL_TREE);
35653 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35654 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35655 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35656 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35657 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35658 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35659 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35660 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35661 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35662 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35663 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35664 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35667 static void
35668 ix86_init_builtin_types (void)
35670 tree float128_type_node, float80_type_node;
35672 /* The __float80 type. */
35673 float80_type_node = long_double_type_node;
35674 if (TYPE_MODE (float80_type_node) != XFmode)
35676 /* The __float80 type. */
35677 float80_type_node = make_node (REAL_TYPE);
35679 TYPE_PRECISION (float80_type_node) = 80;
35680 layout_type (float80_type_node);
35682 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35684 /* The __float128 type. */
35685 float128_type_node = make_node (REAL_TYPE);
35686 TYPE_PRECISION (float128_type_node) = 128;
35687 layout_type (float128_type_node);
35688 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35690 /* This macro is built by i386-builtin-types.awk. */
35691 DEFINE_BUILTIN_PRIMITIVE_TYPES;
35694 static void
35695 ix86_init_builtins (void)
35697 tree t;
35699 ix86_init_builtin_types ();
35701 /* Builtins to get CPU type and features. */
35702 ix86_init_platform_type_builtins ();
35704 /* TFmode support builtins. */
35705 def_builtin_const (0, "__builtin_infq",
35706 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
35707 def_builtin_const (0, "__builtin_huge_valq",
35708 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
35710 /* We will expand them to normal call if SSE isn't available since
35711 they are used by libgcc. */
35712 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
35713 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
35714 BUILT_IN_MD, "__fabstf2", NULL_TREE);
35715 TREE_READONLY (t) = 1;
35716 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
35718 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
35719 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
35720 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
35721 TREE_READONLY (t) = 1;
35722 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
35724 ix86_init_tm_builtins ();
35725 ix86_init_mmx_sse_builtins ();
35726 ix86_init_mpx_builtins ();
35728 if (TARGET_LP64)
35729 ix86_init_builtins_va_builtins_abi ();
35731 #ifdef SUBTARGET_INIT_BUILTINS
35732 SUBTARGET_INIT_BUILTINS;
35733 #endif
35736 /* Return the ix86 builtin for CODE. */
35738 static tree
35739 ix86_builtin_decl (unsigned code, bool)
35741 if (code >= IX86_BUILTIN_MAX)
35742 return error_mark_node;
35744 return ix86_builtins[code];
35747 /* Errors in the source file can cause expand_expr to return const0_rtx
35748 where we expect a vector. To avoid crashing, use one of the vector
35749 clear instructions. */
35750 static rtx
35751 safe_vector_operand (rtx x, machine_mode mode)
35753 if (x == const0_rtx)
35754 x = CONST0_RTX (mode);
35755 return x;
35758 /* Fixup modeless constants to fit required mode. */
35759 static rtx
35760 fixup_modeless_constant (rtx x, machine_mode mode)
35762 if (GET_MODE (x) == VOIDmode)
35763 x = convert_to_mode (mode, x, 1);
35764 return x;
35767 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
35769 static rtx
35770 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
35772 rtx pat;
35773 tree arg0 = CALL_EXPR_ARG (exp, 0);
35774 tree arg1 = CALL_EXPR_ARG (exp, 1);
35775 rtx op0 = expand_normal (arg0);
35776 rtx op1 = expand_normal (arg1);
35777 machine_mode tmode = insn_data[icode].operand[0].mode;
35778 machine_mode mode0 = insn_data[icode].operand[1].mode;
35779 machine_mode mode1 = insn_data[icode].operand[2].mode;
35781 if (VECTOR_MODE_P (mode0))
35782 op0 = safe_vector_operand (op0, mode0);
35783 if (VECTOR_MODE_P (mode1))
35784 op1 = safe_vector_operand (op1, mode1);
35786 if (optimize || !target
35787 || GET_MODE (target) != tmode
35788 || !insn_data[icode].operand[0].predicate (target, tmode))
35789 target = gen_reg_rtx (tmode);
35791 if (GET_MODE (op1) == SImode && mode1 == TImode)
35793 rtx x = gen_reg_rtx (V4SImode);
35794 emit_insn (gen_sse2_loadd (x, op1));
35795 op1 = gen_lowpart (TImode, x);
35798 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35799 op0 = copy_to_mode_reg (mode0, op0);
35800 if (!insn_data[icode].operand[2].predicate (op1, mode1))
35801 op1 = copy_to_mode_reg (mode1, op1);
35803 pat = GEN_FCN (icode) (target, op0, op1);
35804 if (! pat)
35805 return 0;
35807 emit_insn (pat);
35809 return target;
35812 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
35814 static rtx
35815 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
35816 enum ix86_builtin_func_type m_type,
35817 enum rtx_code sub_code)
35819 rtx pat;
35820 int i;
35821 int nargs;
35822 bool comparison_p = false;
35823 bool tf_p = false;
35824 bool last_arg_constant = false;
35825 int num_memory = 0;
35826 struct {
35827 rtx op;
35828 machine_mode mode;
35829 } args[4];
35831 machine_mode tmode = insn_data[icode].operand[0].mode;
35833 switch (m_type)
35835 case MULTI_ARG_4_DF2_DI_I:
35836 case MULTI_ARG_4_DF2_DI_I1:
35837 case MULTI_ARG_4_SF2_SI_I:
35838 case MULTI_ARG_4_SF2_SI_I1:
35839 nargs = 4;
35840 last_arg_constant = true;
35841 break;
35843 case MULTI_ARG_3_SF:
35844 case MULTI_ARG_3_DF:
35845 case MULTI_ARG_3_SF2:
35846 case MULTI_ARG_3_DF2:
35847 case MULTI_ARG_3_DI:
35848 case MULTI_ARG_3_SI:
35849 case MULTI_ARG_3_SI_DI:
35850 case MULTI_ARG_3_HI:
35851 case MULTI_ARG_3_HI_SI:
35852 case MULTI_ARG_3_QI:
35853 case MULTI_ARG_3_DI2:
35854 case MULTI_ARG_3_SI2:
35855 case MULTI_ARG_3_HI2:
35856 case MULTI_ARG_3_QI2:
35857 nargs = 3;
35858 break;
35860 case MULTI_ARG_2_SF:
35861 case MULTI_ARG_2_DF:
35862 case MULTI_ARG_2_DI:
35863 case MULTI_ARG_2_SI:
35864 case MULTI_ARG_2_HI:
35865 case MULTI_ARG_2_QI:
35866 nargs = 2;
35867 break;
35869 case MULTI_ARG_2_DI_IMM:
35870 case MULTI_ARG_2_SI_IMM:
35871 case MULTI_ARG_2_HI_IMM:
35872 case MULTI_ARG_2_QI_IMM:
35873 nargs = 2;
35874 last_arg_constant = true;
35875 break;
35877 case MULTI_ARG_1_SF:
35878 case MULTI_ARG_1_DF:
35879 case MULTI_ARG_1_SF2:
35880 case MULTI_ARG_1_DF2:
35881 case MULTI_ARG_1_DI:
35882 case MULTI_ARG_1_SI:
35883 case MULTI_ARG_1_HI:
35884 case MULTI_ARG_1_QI:
35885 case MULTI_ARG_1_SI_DI:
35886 case MULTI_ARG_1_HI_DI:
35887 case MULTI_ARG_1_HI_SI:
35888 case MULTI_ARG_1_QI_DI:
35889 case MULTI_ARG_1_QI_SI:
35890 case MULTI_ARG_1_QI_HI:
35891 nargs = 1;
35892 break;
35894 case MULTI_ARG_2_DI_CMP:
35895 case MULTI_ARG_2_SI_CMP:
35896 case MULTI_ARG_2_HI_CMP:
35897 case MULTI_ARG_2_QI_CMP:
35898 nargs = 2;
35899 comparison_p = true;
35900 break;
35902 case MULTI_ARG_2_SF_TF:
35903 case MULTI_ARG_2_DF_TF:
35904 case MULTI_ARG_2_DI_TF:
35905 case MULTI_ARG_2_SI_TF:
35906 case MULTI_ARG_2_HI_TF:
35907 case MULTI_ARG_2_QI_TF:
35908 nargs = 2;
35909 tf_p = true;
35910 break;
35912 default:
35913 gcc_unreachable ();
35916 if (optimize || !target
35917 || GET_MODE (target) != tmode
35918 || !insn_data[icode].operand[0].predicate (target, tmode))
35919 target = gen_reg_rtx (tmode);
35921 gcc_assert (nargs <= 4);
35923 for (i = 0; i < nargs; i++)
35925 tree arg = CALL_EXPR_ARG (exp, i);
35926 rtx op = expand_normal (arg);
35927 int adjust = (comparison_p) ? 1 : 0;
35928 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
35930 if (last_arg_constant && i == nargs - 1)
35932 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
35934 enum insn_code new_icode = icode;
35935 switch (icode)
35937 case CODE_FOR_xop_vpermil2v2df3:
35938 case CODE_FOR_xop_vpermil2v4sf3:
35939 case CODE_FOR_xop_vpermil2v4df3:
35940 case CODE_FOR_xop_vpermil2v8sf3:
35941 error ("the last argument must be a 2-bit immediate");
35942 return gen_reg_rtx (tmode);
35943 case CODE_FOR_xop_rotlv2di3:
35944 new_icode = CODE_FOR_rotlv2di3;
35945 goto xop_rotl;
35946 case CODE_FOR_xop_rotlv4si3:
35947 new_icode = CODE_FOR_rotlv4si3;
35948 goto xop_rotl;
35949 case CODE_FOR_xop_rotlv8hi3:
35950 new_icode = CODE_FOR_rotlv8hi3;
35951 goto xop_rotl;
35952 case CODE_FOR_xop_rotlv16qi3:
35953 new_icode = CODE_FOR_rotlv16qi3;
35954 xop_rotl:
35955 if (CONST_INT_P (op))
35957 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
35958 op = GEN_INT (INTVAL (op) & mask);
35959 gcc_checking_assert
35960 (insn_data[icode].operand[i + 1].predicate (op, mode));
35962 else
35964 gcc_checking_assert
35965 (nargs == 2
35966 && insn_data[new_icode].operand[0].mode == tmode
35967 && insn_data[new_icode].operand[1].mode == tmode
35968 && insn_data[new_icode].operand[2].mode == mode
35969 && insn_data[new_icode].operand[0].predicate
35970 == insn_data[icode].operand[0].predicate
35971 && insn_data[new_icode].operand[1].predicate
35972 == insn_data[icode].operand[1].predicate);
35973 icode = new_icode;
35974 goto non_constant;
35976 break;
35977 default:
35978 gcc_unreachable ();
35982 else
35984 non_constant:
35985 if (VECTOR_MODE_P (mode))
35986 op = safe_vector_operand (op, mode);
35988 /* If we aren't optimizing, only allow one memory operand to be
35989 generated. */
35990 if (memory_operand (op, mode))
35991 num_memory++;
35993 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
35995 if (optimize
35996 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
35997 || num_memory > 1)
35998 op = force_reg (mode, op);
36001 args[i].op = op;
36002 args[i].mode = mode;
36005 switch (nargs)
36007 case 1:
36008 pat = GEN_FCN (icode) (target, args[0].op);
36009 break;
36011 case 2:
36012 if (tf_p)
36013 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
36014 GEN_INT ((int)sub_code));
36015 else if (! comparison_p)
36016 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
36017 else
36019 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
36020 args[0].op,
36021 args[1].op);
36023 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
36025 break;
36027 case 3:
36028 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
36029 break;
36031 case 4:
36032 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
36033 break;
36035 default:
36036 gcc_unreachable ();
36039 if (! pat)
36040 return 0;
36042 emit_insn (pat);
36043 return target;
36046 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
36047 insns with vec_merge. */
36049 static rtx
36050 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
36051 rtx target)
36053 rtx pat;
36054 tree arg0 = CALL_EXPR_ARG (exp, 0);
36055 rtx op1, op0 = expand_normal (arg0);
36056 machine_mode tmode = insn_data[icode].operand[0].mode;
36057 machine_mode mode0 = insn_data[icode].operand[1].mode;
36059 if (optimize || !target
36060 || GET_MODE (target) != tmode
36061 || !insn_data[icode].operand[0].predicate (target, tmode))
36062 target = gen_reg_rtx (tmode);
36064 if (VECTOR_MODE_P (mode0))
36065 op0 = safe_vector_operand (op0, mode0);
36067 if ((optimize && !register_operand (op0, mode0))
36068 || !insn_data[icode].operand[1].predicate (op0, mode0))
36069 op0 = copy_to_mode_reg (mode0, op0);
36071 op1 = op0;
36072 if (!insn_data[icode].operand[2].predicate (op1, mode0))
36073 op1 = copy_to_mode_reg (mode0, op1);
36075 pat = GEN_FCN (icode) (target, op0, op1);
36076 if (! pat)
36077 return 0;
36078 emit_insn (pat);
36079 return target;
36082 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
36084 static rtx
36085 ix86_expand_sse_compare (const struct builtin_description *d,
36086 tree exp, rtx target, bool swap)
36088 rtx pat;
36089 tree arg0 = CALL_EXPR_ARG (exp, 0);
36090 tree arg1 = CALL_EXPR_ARG (exp, 1);
36091 rtx op0 = expand_normal (arg0);
36092 rtx op1 = expand_normal (arg1);
36093 rtx op2;
36094 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36095 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36096 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36097 enum rtx_code comparison = d->comparison;
36099 if (VECTOR_MODE_P (mode0))
36100 op0 = safe_vector_operand (op0, mode0);
36101 if (VECTOR_MODE_P (mode1))
36102 op1 = safe_vector_operand (op1, mode1);
36104 /* Swap operands if we have a comparison that isn't available in
36105 hardware. */
36106 if (swap)
36107 std::swap (op0, op1);
36109 if (optimize || !target
36110 || GET_MODE (target) != tmode
36111 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36112 target = gen_reg_rtx (tmode);
36114 if ((optimize && !register_operand (op0, mode0))
36115 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
36116 op0 = copy_to_mode_reg (mode0, op0);
36117 if ((optimize && !register_operand (op1, mode1))
36118 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
36119 op1 = copy_to_mode_reg (mode1, op1);
36121 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
36122 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36123 if (! pat)
36124 return 0;
36125 emit_insn (pat);
36126 return target;
36129 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
36131 static rtx
36132 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
36133 rtx target)
36135 rtx pat;
36136 tree arg0 = CALL_EXPR_ARG (exp, 0);
36137 tree arg1 = CALL_EXPR_ARG (exp, 1);
36138 rtx op0 = expand_normal (arg0);
36139 rtx op1 = expand_normal (arg1);
36140 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36141 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36142 enum rtx_code comparison = d->comparison;
36144 if (VECTOR_MODE_P (mode0))
36145 op0 = safe_vector_operand (op0, mode0);
36146 if (VECTOR_MODE_P (mode1))
36147 op1 = safe_vector_operand (op1, mode1);
36149 /* Swap operands if we have a comparison that isn't available in
36150 hardware. */
36151 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
36152 std::swap (op0, op1);
36154 target = gen_reg_rtx (SImode);
36155 emit_move_insn (target, const0_rtx);
36156 target = gen_rtx_SUBREG (QImode, target, 0);
36158 if ((optimize && !register_operand (op0, mode0))
36159 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36160 op0 = copy_to_mode_reg (mode0, op0);
36161 if ((optimize && !register_operand (op1, mode1))
36162 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36163 op1 = copy_to_mode_reg (mode1, op1);
36165 pat = GEN_FCN (d->icode) (op0, op1);
36166 if (! pat)
36167 return 0;
36168 emit_insn (pat);
36169 emit_insn (gen_rtx_SET (VOIDmode,
36170 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36171 gen_rtx_fmt_ee (comparison, QImode,
36172 SET_DEST (pat),
36173 const0_rtx)));
36175 return SUBREG_REG (target);
36178 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
36180 static rtx
36181 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
36182 rtx target)
36184 rtx pat;
36185 tree arg0 = CALL_EXPR_ARG (exp, 0);
36186 rtx op1, op0 = expand_normal (arg0);
36187 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36188 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36190 if (optimize || target == 0
36191 || GET_MODE (target) != tmode
36192 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36193 target = gen_reg_rtx (tmode);
36195 if (VECTOR_MODE_P (mode0))
36196 op0 = safe_vector_operand (op0, mode0);
36198 if ((optimize && !register_operand (op0, mode0))
36199 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36200 op0 = copy_to_mode_reg (mode0, op0);
36202 op1 = GEN_INT (d->comparison);
36204 pat = GEN_FCN (d->icode) (target, op0, op1);
36205 if (! pat)
36206 return 0;
36207 emit_insn (pat);
36208 return target;
36211 static rtx
36212 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
36213 tree exp, rtx target)
36215 rtx pat;
36216 tree arg0 = CALL_EXPR_ARG (exp, 0);
36217 tree arg1 = CALL_EXPR_ARG (exp, 1);
36218 rtx op0 = expand_normal (arg0);
36219 rtx op1 = expand_normal (arg1);
36220 rtx op2;
36221 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36222 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36223 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36225 if (optimize || target == 0
36226 || GET_MODE (target) != tmode
36227 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36228 target = gen_reg_rtx (tmode);
36230 op0 = safe_vector_operand (op0, mode0);
36231 op1 = safe_vector_operand (op1, mode1);
36233 if ((optimize && !register_operand (op0, mode0))
36234 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36235 op0 = copy_to_mode_reg (mode0, op0);
36236 if ((optimize && !register_operand (op1, mode1))
36237 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36238 op1 = copy_to_mode_reg (mode1, op1);
36240 op2 = GEN_INT (d->comparison);
36242 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36243 if (! pat)
36244 return 0;
36245 emit_insn (pat);
36246 return target;
36249 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
36251 static rtx
36252 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
36253 rtx target)
36255 rtx pat;
36256 tree arg0 = CALL_EXPR_ARG (exp, 0);
36257 tree arg1 = CALL_EXPR_ARG (exp, 1);
36258 rtx op0 = expand_normal (arg0);
36259 rtx op1 = expand_normal (arg1);
36260 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36261 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36262 enum rtx_code comparison = d->comparison;
36264 if (VECTOR_MODE_P (mode0))
36265 op0 = safe_vector_operand (op0, mode0);
36266 if (VECTOR_MODE_P (mode1))
36267 op1 = safe_vector_operand (op1, mode1);
36269 target = gen_reg_rtx (SImode);
36270 emit_move_insn (target, const0_rtx);
36271 target = gen_rtx_SUBREG (QImode, target, 0);
36273 if ((optimize && !register_operand (op0, mode0))
36274 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36275 op0 = copy_to_mode_reg (mode0, op0);
36276 if ((optimize && !register_operand (op1, mode1))
36277 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36278 op1 = copy_to_mode_reg (mode1, op1);
36280 pat = GEN_FCN (d->icode) (op0, op1);
36281 if (! pat)
36282 return 0;
36283 emit_insn (pat);
36284 emit_insn (gen_rtx_SET (VOIDmode,
36285 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36286 gen_rtx_fmt_ee (comparison, QImode,
36287 SET_DEST (pat),
36288 const0_rtx)));
36290 return SUBREG_REG (target);
36293 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
36295 static rtx
36296 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36297 tree exp, rtx target)
36299 rtx pat;
36300 tree arg0 = CALL_EXPR_ARG (exp, 0);
36301 tree arg1 = CALL_EXPR_ARG (exp, 1);
36302 tree arg2 = CALL_EXPR_ARG (exp, 2);
36303 tree arg3 = CALL_EXPR_ARG (exp, 3);
36304 tree arg4 = CALL_EXPR_ARG (exp, 4);
36305 rtx scratch0, scratch1;
36306 rtx op0 = expand_normal (arg0);
36307 rtx op1 = expand_normal (arg1);
36308 rtx op2 = expand_normal (arg2);
36309 rtx op3 = expand_normal (arg3);
36310 rtx op4 = expand_normal (arg4);
36311 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36313 tmode0 = insn_data[d->icode].operand[0].mode;
36314 tmode1 = insn_data[d->icode].operand[1].mode;
36315 modev2 = insn_data[d->icode].operand[2].mode;
36316 modei3 = insn_data[d->icode].operand[3].mode;
36317 modev4 = insn_data[d->icode].operand[4].mode;
36318 modei5 = insn_data[d->icode].operand[5].mode;
36319 modeimm = insn_data[d->icode].operand[6].mode;
36321 if (VECTOR_MODE_P (modev2))
36322 op0 = safe_vector_operand (op0, modev2);
36323 if (VECTOR_MODE_P (modev4))
36324 op2 = safe_vector_operand (op2, modev4);
36326 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36327 op0 = copy_to_mode_reg (modev2, op0);
36328 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36329 op1 = copy_to_mode_reg (modei3, op1);
36330 if ((optimize && !register_operand (op2, modev4))
36331 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36332 op2 = copy_to_mode_reg (modev4, op2);
36333 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36334 op3 = copy_to_mode_reg (modei5, op3);
36336 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36338 error ("the fifth argument must be an 8-bit immediate");
36339 return const0_rtx;
36342 if (d->code == IX86_BUILTIN_PCMPESTRI128)
36344 if (optimize || !target
36345 || GET_MODE (target) != tmode0
36346 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36347 target = gen_reg_rtx (tmode0);
36349 scratch1 = gen_reg_rtx (tmode1);
36351 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36353 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36355 if (optimize || !target
36356 || GET_MODE (target) != tmode1
36357 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36358 target = gen_reg_rtx (tmode1);
36360 scratch0 = gen_reg_rtx (tmode0);
36362 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36364 else
36366 gcc_assert (d->flag);
36368 scratch0 = gen_reg_rtx (tmode0);
36369 scratch1 = gen_reg_rtx (tmode1);
36371 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36374 if (! pat)
36375 return 0;
36377 emit_insn (pat);
36379 if (d->flag)
36381 target = gen_reg_rtx (SImode);
36382 emit_move_insn (target, const0_rtx);
36383 target = gen_rtx_SUBREG (QImode, target, 0);
36385 emit_insn
36386 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36387 gen_rtx_fmt_ee (EQ, QImode,
36388 gen_rtx_REG ((machine_mode) d->flag,
36389 FLAGS_REG),
36390 const0_rtx)));
36391 return SUBREG_REG (target);
36393 else
36394 return target;
36398 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
36400 static rtx
36401 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36402 tree exp, rtx target)
36404 rtx pat;
36405 tree arg0 = CALL_EXPR_ARG (exp, 0);
36406 tree arg1 = CALL_EXPR_ARG (exp, 1);
36407 tree arg2 = CALL_EXPR_ARG (exp, 2);
36408 rtx scratch0, scratch1;
36409 rtx op0 = expand_normal (arg0);
36410 rtx op1 = expand_normal (arg1);
36411 rtx op2 = expand_normal (arg2);
36412 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36414 tmode0 = insn_data[d->icode].operand[0].mode;
36415 tmode1 = insn_data[d->icode].operand[1].mode;
36416 modev2 = insn_data[d->icode].operand[2].mode;
36417 modev3 = insn_data[d->icode].operand[3].mode;
36418 modeimm = insn_data[d->icode].operand[4].mode;
36420 if (VECTOR_MODE_P (modev2))
36421 op0 = safe_vector_operand (op0, modev2);
36422 if (VECTOR_MODE_P (modev3))
36423 op1 = safe_vector_operand (op1, modev3);
36425 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36426 op0 = copy_to_mode_reg (modev2, op0);
36427 if ((optimize && !register_operand (op1, modev3))
36428 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36429 op1 = copy_to_mode_reg (modev3, op1);
36431 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36433 error ("the third argument must be an 8-bit immediate");
36434 return const0_rtx;
36437 if (d->code == IX86_BUILTIN_PCMPISTRI128)
36439 if (optimize || !target
36440 || GET_MODE (target) != tmode0
36441 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36442 target = gen_reg_rtx (tmode0);
36444 scratch1 = gen_reg_rtx (tmode1);
36446 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36448 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36450 if (optimize || !target
36451 || GET_MODE (target) != tmode1
36452 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36453 target = gen_reg_rtx (tmode1);
36455 scratch0 = gen_reg_rtx (tmode0);
36457 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36459 else
36461 gcc_assert (d->flag);
36463 scratch0 = gen_reg_rtx (tmode0);
36464 scratch1 = gen_reg_rtx (tmode1);
36466 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36469 if (! pat)
36470 return 0;
36472 emit_insn (pat);
36474 if (d->flag)
36476 target = gen_reg_rtx (SImode);
36477 emit_move_insn (target, const0_rtx);
36478 target = gen_rtx_SUBREG (QImode, target, 0);
36480 emit_insn
36481 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36482 gen_rtx_fmt_ee (EQ, QImode,
36483 gen_rtx_REG ((machine_mode) d->flag,
36484 FLAGS_REG),
36485 const0_rtx)));
36486 return SUBREG_REG (target);
36488 else
36489 return target;
36492 /* Subroutine of ix86_expand_builtin to take care of insns with
36493 variable number of operands. */
36495 static rtx
36496 ix86_expand_args_builtin (const struct builtin_description *d,
36497 tree exp, rtx target)
36499 rtx pat, real_target;
36500 unsigned int i, nargs;
36501 unsigned int nargs_constant = 0;
36502 unsigned int mask_pos = 0;
36503 int num_memory = 0;
36504 struct
36506 rtx op;
36507 machine_mode mode;
36508 } args[6];
36509 bool last_arg_count = false;
36510 enum insn_code icode = d->icode;
36511 const struct insn_data_d *insn_p = &insn_data[icode];
36512 machine_mode tmode = insn_p->operand[0].mode;
36513 machine_mode rmode = VOIDmode;
36514 bool swap = false;
36515 enum rtx_code comparison = d->comparison;
36517 switch ((enum ix86_builtin_func_type) d->flag)
36519 case V2DF_FTYPE_V2DF_ROUND:
36520 case V4DF_FTYPE_V4DF_ROUND:
36521 case V4SF_FTYPE_V4SF_ROUND:
36522 case V8SF_FTYPE_V8SF_ROUND:
36523 case V4SI_FTYPE_V4SF_ROUND:
36524 case V8SI_FTYPE_V8SF_ROUND:
36525 return ix86_expand_sse_round (d, exp, target);
36526 case V4SI_FTYPE_V2DF_V2DF_ROUND:
36527 case V8SI_FTYPE_V4DF_V4DF_ROUND:
36528 case V16SI_FTYPE_V8DF_V8DF_ROUND:
36529 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36530 case INT_FTYPE_V8SF_V8SF_PTEST:
36531 case INT_FTYPE_V4DI_V4DI_PTEST:
36532 case INT_FTYPE_V4DF_V4DF_PTEST:
36533 case INT_FTYPE_V4SF_V4SF_PTEST:
36534 case INT_FTYPE_V2DI_V2DI_PTEST:
36535 case INT_FTYPE_V2DF_V2DF_PTEST:
36536 return ix86_expand_sse_ptest (d, exp, target);
36537 case FLOAT128_FTYPE_FLOAT128:
36538 case FLOAT_FTYPE_FLOAT:
36539 case INT_FTYPE_INT:
36540 case UINT64_FTYPE_INT:
36541 case UINT16_FTYPE_UINT16:
36542 case INT64_FTYPE_INT64:
36543 case INT64_FTYPE_V4SF:
36544 case INT64_FTYPE_V2DF:
36545 case INT_FTYPE_V16QI:
36546 case INT_FTYPE_V8QI:
36547 case INT_FTYPE_V8SF:
36548 case INT_FTYPE_V4DF:
36549 case INT_FTYPE_V4SF:
36550 case INT_FTYPE_V2DF:
36551 case INT_FTYPE_V32QI:
36552 case V16QI_FTYPE_V16QI:
36553 case V8SI_FTYPE_V8SF:
36554 case V8SI_FTYPE_V4SI:
36555 case V8HI_FTYPE_V8HI:
36556 case V8HI_FTYPE_V16QI:
36557 case V8QI_FTYPE_V8QI:
36558 case V8SF_FTYPE_V8SF:
36559 case V8SF_FTYPE_V8SI:
36560 case V8SF_FTYPE_V4SF:
36561 case V8SF_FTYPE_V8HI:
36562 case V4SI_FTYPE_V4SI:
36563 case V4SI_FTYPE_V16QI:
36564 case V4SI_FTYPE_V4SF:
36565 case V4SI_FTYPE_V8SI:
36566 case V4SI_FTYPE_V8HI:
36567 case V4SI_FTYPE_V4DF:
36568 case V4SI_FTYPE_V2DF:
36569 case V4HI_FTYPE_V4HI:
36570 case V4DF_FTYPE_V4DF:
36571 case V4DF_FTYPE_V4SI:
36572 case V4DF_FTYPE_V4SF:
36573 case V4DF_FTYPE_V2DF:
36574 case V4SF_FTYPE_V4SF:
36575 case V4SF_FTYPE_V4SI:
36576 case V4SF_FTYPE_V8SF:
36577 case V4SF_FTYPE_V4DF:
36578 case V4SF_FTYPE_V8HI:
36579 case V4SF_FTYPE_V2DF:
36580 case V2DI_FTYPE_V2DI:
36581 case V2DI_FTYPE_V16QI:
36582 case V2DI_FTYPE_V8HI:
36583 case V2DI_FTYPE_V4SI:
36584 case V2DF_FTYPE_V2DF:
36585 case V2DF_FTYPE_V4SI:
36586 case V2DF_FTYPE_V4DF:
36587 case V2DF_FTYPE_V4SF:
36588 case V2DF_FTYPE_V2SI:
36589 case V2SI_FTYPE_V2SI:
36590 case V2SI_FTYPE_V4SF:
36591 case V2SI_FTYPE_V2SF:
36592 case V2SI_FTYPE_V2DF:
36593 case V2SF_FTYPE_V2SF:
36594 case V2SF_FTYPE_V2SI:
36595 case V32QI_FTYPE_V32QI:
36596 case V32QI_FTYPE_V16QI:
36597 case V16HI_FTYPE_V16HI:
36598 case V16HI_FTYPE_V8HI:
36599 case V8SI_FTYPE_V8SI:
36600 case V16HI_FTYPE_V16QI:
36601 case V8SI_FTYPE_V16QI:
36602 case V4DI_FTYPE_V16QI:
36603 case V8SI_FTYPE_V8HI:
36604 case V4DI_FTYPE_V8HI:
36605 case V4DI_FTYPE_V4SI:
36606 case V4DI_FTYPE_V2DI:
36607 case HI_FTYPE_HI:
36608 case HI_FTYPE_V16QI:
36609 case SI_FTYPE_V32QI:
36610 case DI_FTYPE_V64QI:
36611 case V16QI_FTYPE_HI:
36612 case V32QI_FTYPE_SI:
36613 case V64QI_FTYPE_DI:
36614 case V8HI_FTYPE_QI:
36615 case V16HI_FTYPE_HI:
36616 case V32HI_FTYPE_SI:
36617 case V4SI_FTYPE_QI:
36618 case V8SI_FTYPE_QI:
36619 case V4SI_FTYPE_HI:
36620 case V8SI_FTYPE_HI:
36621 case QI_FTYPE_V8HI:
36622 case HI_FTYPE_V16HI:
36623 case SI_FTYPE_V32HI:
36624 case QI_FTYPE_V4SI:
36625 case QI_FTYPE_V8SI:
36626 case HI_FTYPE_V16SI:
36627 case QI_FTYPE_V2DI:
36628 case QI_FTYPE_V4DI:
36629 case QI_FTYPE_V8DI:
36630 case UINT_FTYPE_V2DF:
36631 case UINT_FTYPE_V4SF:
36632 case UINT64_FTYPE_V2DF:
36633 case UINT64_FTYPE_V4SF:
36634 case V16QI_FTYPE_V8DI:
36635 case V16HI_FTYPE_V16SI:
36636 case V16SI_FTYPE_HI:
36637 case V2DI_FTYPE_QI:
36638 case V4DI_FTYPE_QI:
36639 case V16SI_FTYPE_V16SI:
36640 case V16SI_FTYPE_INT:
36641 case V16SF_FTYPE_FLOAT:
36642 case V16SF_FTYPE_V8SF:
36643 case V16SI_FTYPE_V8SI:
36644 case V16SF_FTYPE_V4SF:
36645 case V16SI_FTYPE_V4SI:
36646 case V16SF_FTYPE_V16SF:
36647 case V8HI_FTYPE_V8DI:
36648 case V8UHI_FTYPE_V8UHI:
36649 case V8SI_FTYPE_V8DI:
36650 case V8SF_FTYPE_V8DF:
36651 case V8DI_FTYPE_QI:
36652 case V8DI_FTYPE_INT64:
36653 case V8DI_FTYPE_V4DI:
36654 case V8DI_FTYPE_V8DI:
36655 case V8DF_FTYPE_DOUBLE:
36656 case V8DF_FTYPE_V4DF:
36657 case V8DF_FTYPE_V2DF:
36658 case V8DF_FTYPE_V8DF:
36659 case V8DF_FTYPE_V8SI:
36660 nargs = 1;
36661 break;
36662 case V4SF_FTYPE_V4SF_VEC_MERGE:
36663 case V2DF_FTYPE_V2DF_VEC_MERGE:
36664 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36665 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36666 case V16QI_FTYPE_V16QI_V16QI:
36667 case V16QI_FTYPE_V8HI_V8HI:
36668 case V16SI_FTYPE_V16SI_V16SI:
36669 case V16SF_FTYPE_V16SF_V16SF:
36670 case V16SF_FTYPE_V16SF_V16SI:
36671 case V8QI_FTYPE_V8QI_V8QI:
36672 case V8QI_FTYPE_V4HI_V4HI:
36673 case V8HI_FTYPE_V8HI_V8HI:
36674 case V8HI_FTYPE_V16QI_V16QI:
36675 case V8HI_FTYPE_V4SI_V4SI:
36676 case V8SF_FTYPE_V8SF_V8SF:
36677 case V8SF_FTYPE_V8SF_V8SI:
36678 case V8DI_FTYPE_V8DI_V8DI:
36679 case V8DF_FTYPE_V8DF_V8DF:
36680 case V8DF_FTYPE_V8DF_V8DI:
36681 case V4SI_FTYPE_V4SI_V4SI:
36682 case V4SI_FTYPE_V8HI_V8HI:
36683 case V4SI_FTYPE_V4SF_V4SF:
36684 case V4SI_FTYPE_V2DF_V2DF:
36685 case V4HI_FTYPE_V4HI_V4HI:
36686 case V4HI_FTYPE_V8QI_V8QI:
36687 case V4HI_FTYPE_V2SI_V2SI:
36688 case V4DF_FTYPE_V4DF_V4DF:
36689 case V4DF_FTYPE_V4DF_V4DI:
36690 case V4SF_FTYPE_V4SF_V4SF:
36691 case V4SF_FTYPE_V4SF_V4SI:
36692 case V4SF_FTYPE_V4SF_V2SI:
36693 case V4SF_FTYPE_V4SF_V2DF:
36694 case V4SF_FTYPE_V4SF_UINT:
36695 case V4SF_FTYPE_V4SF_UINT64:
36696 case V4SF_FTYPE_V4SF_DI:
36697 case V4SF_FTYPE_V4SF_SI:
36698 case V2DI_FTYPE_V2DI_V2DI:
36699 case V2DI_FTYPE_V16QI_V16QI:
36700 case V2DI_FTYPE_V4SI_V4SI:
36701 case V2UDI_FTYPE_V4USI_V4USI:
36702 case V2DI_FTYPE_V2DI_V16QI:
36703 case V2DI_FTYPE_V2DF_V2DF:
36704 case V2SI_FTYPE_V2SI_V2SI:
36705 case V2SI_FTYPE_V4HI_V4HI:
36706 case V2SI_FTYPE_V2SF_V2SF:
36707 case V2DF_FTYPE_V2DF_V2DF:
36708 case V2DF_FTYPE_V2DF_V4SF:
36709 case V2DF_FTYPE_V2DF_V2DI:
36710 case V2DF_FTYPE_V2DF_DI:
36711 case V2DF_FTYPE_V2DF_SI:
36712 case V2DF_FTYPE_V2DF_UINT:
36713 case V2DF_FTYPE_V2DF_UINT64:
36714 case V2SF_FTYPE_V2SF_V2SF:
36715 case V1DI_FTYPE_V1DI_V1DI:
36716 case V1DI_FTYPE_V8QI_V8QI:
36717 case V1DI_FTYPE_V2SI_V2SI:
36718 case V32QI_FTYPE_V16HI_V16HI:
36719 case V16HI_FTYPE_V8SI_V8SI:
36720 case V32QI_FTYPE_V32QI_V32QI:
36721 case V16HI_FTYPE_V32QI_V32QI:
36722 case V16HI_FTYPE_V16HI_V16HI:
36723 case V8SI_FTYPE_V4DF_V4DF:
36724 case V8SI_FTYPE_V8SI_V8SI:
36725 case V8SI_FTYPE_V16HI_V16HI:
36726 case V4DI_FTYPE_V4DI_V4DI:
36727 case V4DI_FTYPE_V8SI_V8SI:
36728 case V4UDI_FTYPE_V8USI_V8USI:
36729 case QI_FTYPE_V8DI_V8DI:
36730 case V8DI_FTYPE_V64QI_V64QI:
36731 case HI_FTYPE_V16SI_V16SI:
36732 if (comparison == UNKNOWN)
36733 return ix86_expand_binop_builtin (icode, exp, target);
36734 nargs = 2;
36735 break;
36736 case V4SF_FTYPE_V4SF_V4SF_SWAP:
36737 case V2DF_FTYPE_V2DF_V2DF_SWAP:
36738 gcc_assert (comparison != UNKNOWN);
36739 nargs = 2;
36740 swap = true;
36741 break;
36742 case V16HI_FTYPE_V16HI_V8HI_COUNT:
36743 case V16HI_FTYPE_V16HI_SI_COUNT:
36744 case V8SI_FTYPE_V8SI_V4SI_COUNT:
36745 case V8SI_FTYPE_V8SI_SI_COUNT:
36746 case V4DI_FTYPE_V4DI_V2DI_COUNT:
36747 case V4DI_FTYPE_V4DI_INT_COUNT:
36748 case V8HI_FTYPE_V8HI_V8HI_COUNT:
36749 case V8HI_FTYPE_V8HI_SI_COUNT:
36750 case V4SI_FTYPE_V4SI_V4SI_COUNT:
36751 case V4SI_FTYPE_V4SI_SI_COUNT:
36752 case V4HI_FTYPE_V4HI_V4HI_COUNT:
36753 case V4HI_FTYPE_V4HI_SI_COUNT:
36754 case V2DI_FTYPE_V2DI_V2DI_COUNT:
36755 case V2DI_FTYPE_V2DI_SI_COUNT:
36756 case V2SI_FTYPE_V2SI_V2SI_COUNT:
36757 case V2SI_FTYPE_V2SI_SI_COUNT:
36758 case V1DI_FTYPE_V1DI_V1DI_COUNT:
36759 case V1DI_FTYPE_V1DI_SI_COUNT:
36760 nargs = 2;
36761 last_arg_count = true;
36762 break;
36763 case UINT64_FTYPE_UINT64_UINT64:
36764 case UINT_FTYPE_UINT_UINT:
36765 case UINT_FTYPE_UINT_USHORT:
36766 case UINT_FTYPE_UINT_UCHAR:
36767 case UINT16_FTYPE_UINT16_INT:
36768 case UINT8_FTYPE_UINT8_INT:
36769 case HI_FTYPE_HI_HI:
36770 case SI_FTYPE_SI_SI:
36771 case DI_FTYPE_DI_DI:
36772 case V16SI_FTYPE_V8DF_V8DF:
36773 nargs = 2;
36774 break;
36775 case V2DI_FTYPE_V2DI_INT_CONVERT:
36776 nargs = 2;
36777 rmode = V1TImode;
36778 nargs_constant = 1;
36779 break;
36780 case V4DI_FTYPE_V4DI_INT_CONVERT:
36781 nargs = 2;
36782 rmode = V2TImode;
36783 nargs_constant = 1;
36784 break;
36785 case V8DI_FTYPE_V8DI_INT_CONVERT:
36786 nargs = 2;
36787 rmode = V4TImode;
36788 nargs_constant = 1;
36789 break;
36790 case V8HI_FTYPE_V8HI_INT:
36791 case V8HI_FTYPE_V8SF_INT:
36792 case V16HI_FTYPE_V16SF_INT:
36793 case V8HI_FTYPE_V4SF_INT:
36794 case V8SF_FTYPE_V8SF_INT:
36795 case V4SF_FTYPE_V16SF_INT:
36796 case V16SF_FTYPE_V16SF_INT:
36797 case V4SI_FTYPE_V4SI_INT:
36798 case V4SI_FTYPE_V8SI_INT:
36799 case V4HI_FTYPE_V4HI_INT:
36800 case V4DF_FTYPE_V4DF_INT:
36801 case V4DF_FTYPE_V8DF_INT:
36802 case V4SF_FTYPE_V4SF_INT:
36803 case V4SF_FTYPE_V8SF_INT:
36804 case V2DI_FTYPE_V2DI_INT:
36805 case V2DF_FTYPE_V2DF_INT:
36806 case V2DF_FTYPE_V4DF_INT:
36807 case V16HI_FTYPE_V16HI_INT:
36808 case V8SI_FTYPE_V8SI_INT:
36809 case V16SI_FTYPE_V16SI_INT:
36810 case V4SI_FTYPE_V16SI_INT:
36811 case V4DI_FTYPE_V4DI_INT:
36812 case V2DI_FTYPE_V4DI_INT:
36813 case V4DI_FTYPE_V8DI_INT:
36814 case HI_FTYPE_HI_INT:
36815 case QI_FTYPE_V4SF_INT:
36816 case QI_FTYPE_V2DF_INT:
36817 nargs = 2;
36818 nargs_constant = 1;
36819 break;
36820 case V16QI_FTYPE_V16QI_V16QI_V16QI:
36821 case V8SF_FTYPE_V8SF_V8SF_V8SF:
36822 case V4DF_FTYPE_V4DF_V4DF_V4DF:
36823 case V4SF_FTYPE_V4SF_V4SF_V4SF:
36824 case V2DF_FTYPE_V2DF_V2DF_V2DF:
36825 case V32QI_FTYPE_V32QI_V32QI_V32QI:
36826 case HI_FTYPE_V16SI_V16SI_HI:
36827 case QI_FTYPE_V8DI_V8DI_QI:
36828 case V16HI_FTYPE_V16SI_V16HI_HI:
36829 case V16QI_FTYPE_V16SI_V16QI_HI:
36830 case V16QI_FTYPE_V8DI_V16QI_QI:
36831 case V16SF_FTYPE_V16SF_V16SF_HI:
36832 case V16SF_FTYPE_V16SF_V16SF_V16SF:
36833 case V16SF_FTYPE_V16SF_V16SI_V16SF:
36834 case V16SF_FTYPE_V16SI_V16SF_HI:
36835 case V16SF_FTYPE_V16SI_V16SF_V16SF:
36836 case V16SF_FTYPE_V4SF_V16SF_HI:
36837 case V16SI_FTYPE_SI_V16SI_HI:
36838 case V16SI_FTYPE_V16HI_V16SI_HI:
36839 case V16SI_FTYPE_V16QI_V16SI_HI:
36840 case V16SI_FTYPE_V16SF_V16SI_HI:
36841 case V8SF_FTYPE_V4SF_V8SF_QI:
36842 case V4DF_FTYPE_V2DF_V4DF_QI:
36843 case V8SI_FTYPE_V4SI_V8SI_QI:
36844 case V8SI_FTYPE_SI_V8SI_QI:
36845 case V4SI_FTYPE_V4SI_V4SI_QI:
36846 case V4SI_FTYPE_SI_V4SI_QI:
36847 case V4DI_FTYPE_V2DI_V4DI_QI:
36848 case V4DI_FTYPE_DI_V4DI_QI:
36849 case V2DI_FTYPE_V2DI_V2DI_QI:
36850 case V2DI_FTYPE_DI_V2DI_QI:
36851 case V64QI_FTYPE_V64QI_V64QI_DI:
36852 case V64QI_FTYPE_V16QI_V64QI_DI:
36853 case V64QI_FTYPE_QI_V64QI_DI:
36854 case V32QI_FTYPE_V32QI_V32QI_SI:
36855 case V32QI_FTYPE_V16QI_V32QI_SI:
36856 case V32QI_FTYPE_QI_V32QI_SI:
36857 case V16QI_FTYPE_V16QI_V16QI_HI:
36858 case V16QI_FTYPE_QI_V16QI_HI:
36859 case V32HI_FTYPE_V8HI_V32HI_SI:
36860 case V32HI_FTYPE_HI_V32HI_SI:
36861 case V16HI_FTYPE_V8HI_V16HI_HI:
36862 case V16HI_FTYPE_HI_V16HI_HI:
36863 case V8HI_FTYPE_V8HI_V8HI_QI:
36864 case V8HI_FTYPE_HI_V8HI_QI:
36865 case V8SF_FTYPE_V8HI_V8SF_QI:
36866 case V4SF_FTYPE_V8HI_V4SF_QI:
36867 case V8SI_FTYPE_V8SF_V8SI_QI:
36868 case V4SI_FTYPE_V4SF_V4SI_QI:
36869 case V8DI_FTYPE_V8SF_V8DI_QI:
36870 case V4DI_FTYPE_V4SF_V4DI_QI:
36871 case V2DI_FTYPE_V4SF_V2DI_QI:
36872 case V8SF_FTYPE_V8DI_V8SF_QI:
36873 case V4SF_FTYPE_V4DI_V4SF_QI:
36874 case V4SF_FTYPE_V2DI_V4SF_QI:
36875 case V8DF_FTYPE_V8DI_V8DF_QI:
36876 case V4DF_FTYPE_V4DI_V4DF_QI:
36877 case V2DF_FTYPE_V2DI_V2DF_QI:
36878 case V16QI_FTYPE_V8HI_V16QI_QI:
36879 case V16QI_FTYPE_V16HI_V16QI_HI:
36880 case V16QI_FTYPE_V4SI_V16QI_QI:
36881 case V16QI_FTYPE_V8SI_V16QI_QI:
36882 case V8HI_FTYPE_V4SI_V8HI_QI:
36883 case V8HI_FTYPE_V8SI_V8HI_QI:
36884 case V16QI_FTYPE_V2DI_V16QI_QI:
36885 case V16QI_FTYPE_V4DI_V16QI_QI:
36886 case V8HI_FTYPE_V2DI_V8HI_QI:
36887 case V8HI_FTYPE_V4DI_V8HI_QI:
36888 case V4SI_FTYPE_V2DI_V4SI_QI:
36889 case V4SI_FTYPE_V4DI_V4SI_QI:
36890 case V32QI_FTYPE_V32HI_V32QI_SI:
36891 case HI_FTYPE_V16QI_V16QI_HI:
36892 case SI_FTYPE_V32QI_V32QI_SI:
36893 case DI_FTYPE_V64QI_V64QI_DI:
36894 case QI_FTYPE_V8HI_V8HI_QI:
36895 case HI_FTYPE_V16HI_V16HI_HI:
36896 case SI_FTYPE_V32HI_V32HI_SI:
36897 case QI_FTYPE_V4SI_V4SI_QI:
36898 case QI_FTYPE_V8SI_V8SI_QI:
36899 case QI_FTYPE_V2DI_V2DI_QI:
36900 case QI_FTYPE_V4DI_V4DI_QI:
36901 case V4SF_FTYPE_V2DF_V4SF_QI:
36902 case V4SF_FTYPE_V4DF_V4SF_QI:
36903 case V16SI_FTYPE_V16SI_V16SI_HI:
36904 case V16SI_FTYPE_V16SI_V16SI_V16SI:
36905 case V16SI_FTYPE_V4SI_V16SI_HI:
36906 case V2DI_FTYPE_V2DI_V2DI_V2DI:
36907 case V2DI_FTYPE_V4SI_V2DI_QI:
36908 case V2DI_FTYPE_V8HI_V2DI_QI:
36909 case V2DI_FTYPE_V16QI_V2DI_QI:
36910 case V4DI_FTYPE_V4DI_V4DI_QI:
36911 case V4DI_FTYPE_V4SI_V4DI_QI:
36912 case V4DI_FTYPE_V8HI_V4DI_QI:
36913 case V4DI_FTYPE_V16QI_V4DI_QI:
36914 case V8DI_FTYPE_V8DF_V8DI_QI:
36915 case V4DI_FTYPE_V4DF_V4DI_QI:
36916 case V2DI_FTYPE_V2DF_V2DI_QI:
36917 case V4SI_FTYPE_V4DF_V4SI_QI:
36918 case V4SI_FTYPE_V2DF_V4SI_QI:
36919 case V4SI_FTYPE_V8HI_V4SI_QI:
36920 case V4SI_FTYPE_V16QI_V4SI_QI:
36921 case V8SI_FTYPE_V8SI_V8SI_V8SI:
36922 case V4DI_FTYPE_V4DI_V4DI_V4DI:
36923 case V8DF_FTYPE_V2DF_V8DF_QI:
36924 case V8DF_FTYPE_V4DF_V8DF_QI:
36925 case V8DF_FTYPE_V8DF_V8DF_QI:
36926 case V8DF_FTYPE_V8DF_V8DF_V8DF:
36927 case V8SF_FTYPE_V8SF_V8SF_QI:
36928 case V8SF_FTYPE_V8SI_V8SF_QI:
36929 case V4DF_FTYPE_V4DF_V4DF_QI:
36930 case V4SF_FTYPE_V4SF_V4SF_QI:
36931 case V2DF_FTYPE_V2DF_V2DF_QI:
36932 case V2DF_FTYPE_V4SF_V2DF_QI:
36933 case V2DF_FTYPE_V4SI_V2DF_QI:
36934 case V4SF_FTYPE_V4SI_V4SF_QI:
36935 case V4DF_FTYPE_V4SF_V4DF_QI:
36936 case V4DF_FTYPE_V4SI_V4DF_QI:
36937 case V8SI_FTYPE_V8SI_V8SI_QI:
36938 case V8SI_FTYPE_V8HI_V8SI_QI:
36939 case V8SI_FTYPE_V16QI_V8SI_QI:
36940 case V8DF_FTYPE_V8DF_V8DI_V8DF:
36941 case V8DF_FTYPE_V8DI_V8DF_V8DF:
36942 case V8DF_FTYPE_V8SF_V8DF_QI:
36943 case V8DF_FTYPE_V8SI_V8DF_QI:
36944 case V8DI_FTYPE_DI_V8DI_QI:
36945 case V16SF_FTYPE_V8SF_V16SF_HI:
36946 case V16SI_FTYPE_V8SI_V16SI_HI:
36947 case V16HI_FTYPE_V16HI_V16HI_HI:
36948 case V8HI_FTYPE_V16QI_V8HI_QI:
36949 case V16HI_FTYPE_V16QI_V16HI_HI:
36950 case V32HI_FTYPE_V32HI_V32HI_SI:
36951 case V32HI_FTYPE_V32QI_V32HI_SI:
36952 case V8DI_FTYPE_V16QI_V8DI_QI:
36953 case V8DI_FTYPE_V2DI_V8DI_QI:
36954 case V8DI_FTYPE_V4DI_V8DI_QI:
36955 case V8DI_FTYPE_V8DI_V8DI_QI:
36956 case V8DI_FTYPE_V8DI_V8DI_V8DI:
36957 case V8DI_FTYPE_V8HI_V8DI_QI:
36958 case V8DI_FTYPE_V8SI_V8DI_QI:
36959 case V8HI_FTYPE_V8DI_V8HI_QI:
36960 case V8SF_FTYPE_V8DF_V8SF_QI:
36961 case V8SI_FTYPE_V8DF_V8SI_QI:
36962 case V8SI_FTYPE_V8DI_V8SI_QI:
36963 case V4SI_FTYPE_V4SI_V4SI_V4SI:
36964 nargs = 3;
36965 break;
36966 case V32QI_FTYPE_V32QI_V32QI_INT:
36967 case V16HI_FTYPE_V16HI_V16HI_INT:
36968 case V16QI_FTYPE_V16QI_V16QI_INT:
36969 case V4DI_FTYPE_V4DI_V4DI_INT:
36970 case V8HI_FTYPE_V8HI_V8HI_INT:
36971 case V8SI_FTYPE_V8SI_V8SI_INT:
36972 case V8SI_FTYPE_V8SI_V4SI_INT:
36973 case V8SF_FTYPE_V8SF_V8SF_INT:
36974 case V8SF_FTYPE_V8SF_V4SF_INT:
36975 case V4SI_FTYPE_V4SI_V4SI_INT:
36976 case V4DF_FTYPE_V4DF_V4DF_INT:
36977 case V16SF_FTYPE_V16SF_V16SF_INT:
36978 case V16SF_FTYPE_V16SF_V4SF_INT:
36979 case V16SI_FTYPE_V16SI_V4SI_INT:
36980 case V4DF_FTYPE_V4DF_V2DF_INT:
36981 case V4SF_FTYPE_V4SF_V4SF_INT:
36982 case V2DI_FTYPE_V2DI_V2DI_INT:
36983 case V4DI_FTYPE_V4DI_V2DI_INT:
36984 case V2DF_FTYPE_V2DF_V2DF_INT:
36985 case QI_FTYPE_V8DI_V8DI_INT:
36986 case QI_FTYPE_V8DF_V8DF_INT:
36987 case QI_FTYPE_V2DF_V2DF_INT:
36988 case QI_FTYPE_V4SF_V4SF_INT:
36989 case HI_FTYPE_V16SI_V16SI_INT:
36990 case HI_FTYPE_V16SF_V16SF_INT:
36991 nargs = 3;
36992 nargs_constant = 1;
36993 break;
36994 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
36995 nargs = 3;
36996 rmode = V4DImode;
36997 nargs_constant = 1;
36998 break;
36999 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
37000 nargs = 3;
37001 rmode = V2DImode;
37002 nargs_constant = 1;
37003 break;
37004 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
37005 nargs = 3;
37006 rmode = DImode;
37007 nargs_constant = 1;
37008 break;
37009 case V2DI_FTYPE_V2DI_UINT_UINT:
37010 nargs = 3;
37011 nargs_constant = 2;
37012 break;
37013 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
37014 nargs = 3;
37015 rmode = V8DImode;
37016 nargs_constant = 1;
37017 break;
37018 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
37019 nargs = 5;
37020 rmode = V8DImode;
37021 mask_pos = 2;
37022 nargs_constant = 1;
37023 break;
37024 case QI_FTYPE_V8DF_INT_QI:
37025 case QI_FTYPE_V4DF_INT_QI:
37026 case QI_FTYPE_V2DF_INT_QI:
37027 case HI_FTYPE_V16SF_INT_HI:
37028 case QI_FTYPE_V8SF_INT_QI:
37029 case QI_FTYPE_V4SF_INT_QI:
37030 nargs = 3;
37031 mask_pos = 1;
37032 nargs_constant = 1;
37033 break;
37034 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
37035 nargs = 5;
37036 rmode = V4DImode;
37037 mask_pos = 2;
37038 nargs_constant = 1;
37039 break;
37040 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
37041 nargs = 5;
37042 rmode = V2DImode;
37043 mask_pos = 2;
37044 nargs_constant = 1;
37045 break;
37046 case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
37047 case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
37048 case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
37049 case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
37050 case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
37051 case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
37052 case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
37053 case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
37054 case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
37055 case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
37056 case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
37057 case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
37058 case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
37059 case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
37060 case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
37061 case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
37062 case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
37063 case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
37064 case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
37065 case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
37066 case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
37067 case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
37068 case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
37069 case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
37070 case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
37071 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
37072 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
37073 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
37074 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
37075 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
37076 case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
37077 case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
37078 case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
37079 case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
37080 case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
37081 case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
37082 case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
37083 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
37084 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
37085 case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
37086 case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
37087 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
37088 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
37089 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
37090 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
37091 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
37092 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
37093 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
37094 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
37095 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
37096 case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
37097 case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
37098 case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
37099 case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
37100 nargs = 4;
37101 break;
37102 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
37103 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
37104 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
37105 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
37106 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
37107 nargs = 4;
37108 nargs_constant = 1;
37109 break;
37110 case QI_FTYPE_V4DI_V4DI_INT_QI:
37111 case QI_FTYPE_V8SI_V8SI_INT_QI:
37112 case QI_FTYPE_V4DF_V4DF_INT_QI:
37113 case QI_FTYPE_V8SF_V8SF_INT_QI:
37114 case QI_FTYPE_V2DI_V2DI_INT_QI:
37115 case QI_FTYPE_V4SI_V4SI_INT_QI:
37116 case QI_FTYPE_V2DF_V2DF_INT_QI:
37117 case QI_FTYPE_V4SF_V4SF_INT_QI:
37118 case DI_FTYPE_V64QI_V64QI_INT_DI:
37119 case SI_FTYPE_V32QI_V32QI_INT_SI:
37120 case HI_FTYPE_V16QI_V16QI_INT_HI:
37121 case SI_FTYPE_V32HI_V32HI_INT_SI:
37122 case HI_FTYPE_V16HI_V16HI_INT_HI:
37123 case QI_FTYPE_V8HI_V8HI_INT_QI:
37124 nargs = 4;
37125 mask_pos = 1;
37126 nargs_constant = 1;
37127 break;
37128 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
37129 nargs = 4;
37130 nargs_constant = 2;
37131 break;
37132 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
37133 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
37134 nargs = 4;
37135 break;
37136 case QI_FTYPE_V8DI_V8DI_INT_QI:
37137 case HI_FTYPE_V16SI_V16SI_INT_HI:
37138 case QI_FTYPE_V8DF_V8DF_INT_QI:
37139 case HI_FTYPE_V16SF_V16SF_INT_HI:
37140 mask_pos = 1;
37141 nargs = 4;
37142 nargs_constant = 1;
37143 break;
37144 case V8SF_FTYPE_V8SF_INT_V8SF_QI:
37145 case V4SF_FTYPE_V4SF_INT_V4SF_QI:
37146 case V2DF_FTYPE_V4DF_INT_V2DF_QI:
37147 case V2DI_FTYPE_V4DI_INT_V2DI_QI:
37148 case V8SF_FTYPE_V16SF_INT_V8SF_QI:
37149 case V8SI_FTYPE_V16SI_INT_V8SI_QI:
37150 case V2DF_FTYPE_V8DF_INT_V2DF_QI:
37151 case V2DI_FTYPE_V8DI_INT_V2DI_QI:
37152 case V4SF_FTYPE_V8SF_INT_V4SF_QI:
37153 case V4SI_FTYPE_V8SI_INT_V4SI_QI:
37154 case V8HI_FTYPE_V8SF_INT_V8HI_QI:
37155 case V8HI_FTYPE_V4SF_INT_V8HI_QI:
37156 case V32HI_FTYPE_V32HI_INT_V32HI_SI:
37157 case V16HI_FTYPE_V16HI_INT_V16HI_HI:
37158 case V8HI_FTYPE_V8HI_INT_V8HI_QI:
37159 case V4DI_FTYPE_V4DI_INT_V4DI_QI:
37160 case V2DI_FTYPE_V2DI_INT_V2DI_QI:
37161 case V8SI_FTYPE_V8SI_INT_V8SI_QI:
37162 case V4SI_FTYPE_V4SI_INT_V4SI_QI:
37163 case V4DF_FTYPE_V4DF_INT_V4DF_QI:
37164 case V2DF_FTYPE_V2DF_INT_V2DF_QI:
37165 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
37166 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
37167 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
37168 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
37169 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
37170 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
37171 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
37172 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
37173 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
37174 nargs = 4;
37175 mask_pos = 2;
37176 nargs_constant = 1;
37177 break;
37178 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
37179 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
37180 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
37181 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
37182 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
37183 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
37184 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
37185 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
37186 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
37187 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
37188 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
37189 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
37190 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
37191 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
37192 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
37193 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
37194 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
37195 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
37196 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
37197 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
37198 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
37199 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
37200 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
37201 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
37202 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
37203 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
37204 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
37205 nargs = 5;
37206 mask_pos = 2;
37207 nargs_constant = 1;
37208 break;
37209 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
37210 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
37211 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
37212 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
37213 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
37214 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
37215 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
37216 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
37217 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
37218 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
37219 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
37220 nargs = 5;
37221 nargs = 5;
37222 mask_pos = 1;
37223 nargs_constant = 1;
37224 break;
37226 default:
37227 gcc_unreachable ();
37230 gcc_assert (nargs <= ARRAY_SIZE (args));
37232 if (comparison != UNKNOWN)
37234 gcc_assert (nargs == 2);
37235 return ix86_expand_sse_compare (d, exp, target, swap);
37238 if (rmode == VOIDmode || rmode == tmode)
37240 if (optimize
37241 || target == 0
37242 || GET_MODE (target) != tmode
37243 || !insn_p->operand[0].predicate (target, tmode))
37244 target = gen_reg_rtx (tmode);
37245 real_target = target;
37247 else
37249 real_target = gen_reg_rtx (tmode);
37250 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
37253 for (i = 0; i < nargs; i++)
37255 tree arg = CALL_EXPR_ARG (exp, i);
37256 rtx op = expand_normal (arg);
37257 machine_mode mode = insn_p->operand[i + 1].mode;
37258 bool match = insn_p->operand[i + 1].predicate (op, mode);
37260 if (last_arg_count && (i + 1) == nargs)
37262 /* SIMD shift insns take either an 8-bit immediate or
37263 register as count. But builtin functions take int as
37264 count. If count doesn't match, we put it in register. */
37265 if (!match)
37267 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
37268 if (!insn_p->operand[i + 1].predicate (op, mode))
37269 op = copy_to_reg (op);
37272 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37273 (!mask_pos && (nargs - i) <= nargs_constant))
37275 if (!match)
37276 switch (icode)
37278 case CODE_FOR_avx_vinsertf128v4di:
37279 case CODE_FOR_avx_vextractf128v4di:
37280 error ("the last argument must be an 1-bit immediate");
37281 return const0_rtx;
37283 case CODE_FOR_avx512f_cmpv8di3_mask:
37284 case CODE_FOR_avx512f_cmpv16si3_mask:
37285 case CODE_FOR_avx512f_ucmpv8di3_mask:
37286 case CODE_FOR_avx512f_ucmpv16si3_mask:
37287 case CODE_FOR_avx512vl_cmpv4di3_mask:
37288 case CODE_FOR_avx512vl_cmpv8si3_mask:
37289 case CODE_FOR_avx512vl_ucmpv4di3_mask:
37290 case CODE_FOR_avx512vl_ucmpv8si3_mask:
37291 case CODE_FOR_avx512vl_cmpv2di3_mask:
37292 case CODE_FOR_avx512vl_cmpv4si3_mask:
37293 case CODE_FOR_avx512vl_ucmpv2di3_mask:
37294 case CODE_FOR_avx512vl_ucmpv4si3_mask:
37295 error ("the last argument must be a 3-bit immediate");
37296 return const0_rtx;
37298 case CODE_FOR_sse4_1_roundsd:
37299 case CODE_FOR_sse4_1_roundss:
37301 case CODE_FOR_sse4_1_roundpd:
37302 case CODE_FOR_sse4_1_roundps:
37303 case CODE_FOR_avx_roundpd256:
37304 case CODE_FOR_avx_roundps256:
37306 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37307 case CODE_FOR_sse4_1_roundps_sfix:
37308 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37309 case CODE_FOR_avx_roundps_sfix256:
37311 case CODE_FOR_sse4_1_blendps:
37312 case CODE_FOR_avx_blendpd256:
37313 case CODE_FOR_avx_vpermilv4df:
37314 case CODE_FOR_avx_vpermilv4df_mask:
37315 case CODE_FOR_avx512f_getmantv8df_mask:
37316 case CODE_FOR_avx512f_getmantv16sf_mask:
37317 case CODE_FOR_avx512vl_getmantv8sf_mask:
37318 case CODE_FOR_avx512vl_getmantv4df_mask:
37319 case CODE_FOR_avx512vl_getmantv4sf_mask:
37320 case CODE_FOR_avx512vl_getmantv2df_mask:
37321 case CODE_FOR_avx512dq_rangepv8df_mask_round:
37322 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37323 case CODE_FOR_avx512dq_rangepv4df_mask:
37324 case CODE_FOR_avx512dq_rangepv8sf_mask:
37325 case CODE_FOR_avx512dq_rangepv2df_mask:
37326 case CODE_FOR_avx512dq_rangepv4sf_mask:
37327 case CODE_FOR_avx_shufpd256_mask:
37328 error ("the last argument must be a 4-bit immediate");
37329 return const0_rtx;
37331 case CODE_FOR_sha1rnds4:
37332 case CODE_FOR_sse4_1_blendpd:
37333 case CODE_FOR_avx_vpermilv2df:
37334 case CODE_FOR_avx_vpermilv2df_mask:
37335 case CODE_FOR_xop_vpermil2v2df3:
37336 case CODE_FOR_xop_vpermil2v4sf3:
37337 case CODE_FOR_xop_vpermil2v4df3:
37338 case CODE_FOR_xop_vpermil2v8sf3:
37339 case CODE_FOR_avx512f_vinsertf32x4_mask:
37340 case CODE_FOR_avx512f_vinserti32x4_mask:
37341 case CODE_FOR_avx512f_vextractf32x4_mask:
37342 case CODE_FOR_avx512f_vextracti32x4_mask:
37343 case CODE_FOR_sse2_shufpd:
37344 case CODE_FOR_sse2_shufpd_mask:
37345 case CODE_FOR_avx512dq_shuf_f64x2_mask:
37346 case CODE_FOR_avx512dq_shuf_i64x2_mask:
37347 case CODE_FOR_avx512vl_shuf_i32x4_mask:
37348 case CODE_FOR_avx512vl_shuf_f32x4_mask:
37349 error ("the last argument must be a 2-bit immediate");
37350 return const0_rtx;
37352 case CODE_FOR_avx_vextractf128v4df:
37353 case CODE_FOR_avx_vextractf128v8sf:
37354 case CODE_FOR_avx_vextractf128v8si:
37355 case CODE_FOR_avx_vinsertf128v4df:
37356 case CODE_FOR_avx_vinsertf128v8sf:
37357 case CODE_FOR_avx_vinsertf128v8si:
37358 case CODE_FOR_avx512f_vinsertf64x4_mask:
37359 case CODE_FOR_avx512f_vinserti64x4_mask:
37360 case CODE_FOR_avx512f_vextractf64x4_mask:
37361 case CODE_FOR_avx512f_vextracti64x4_mask:
37362 case CODE_FOR_avx512dq_vinsertf32x8_mask:
37363 case CODE_FOR_avx512dq_vinserti32x8_mask:
37364 case CODE_FOR_avx512vl_vinsertv4df:
37365 case CODE_FOR_avx512vl_vinsertv4di:
37366 case CODE_FOR_avx512vl_vinsertv8sf:
37367 case CODE_FOR_avx512vl_vinsertv8si:
37368 error ("the last argument must be a 1-bit immediate");
37369 return const0_rtx;
37371 case CODE_FOR_avx_vmcmpv2df3:
37372 case CODE_FOR_avx_vmcmpv4sf3:
37373 case CODE_FOR_avx_cmpv2df3:
37374 case CODE_FOR_avx_cmpv4sf3:
37375 case CODE_FOR_avx_cmpv4df3:
37376 case CODE_FOR_avx_cmpv8sf3:
37377 case CODE_FOR_avx512f_cmpv8df3_mask:
37378 case CODE_FOR_avx512f_cmpv16sf3_mask:
37379 case CODE_FOR_avx512f_vmcmpv2df3_mask:
37380 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37381 error ("the last argument must be a 5-bit immediate");
37382 return const0_rtx;
37384 default:
37385 switch (nargs_constant)
37387 case 2:
37388 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37389 (!mask_pos && (nargs - i) == nargs_constant))
37391 error ("the next to last argument must be an 8-bit immediate");
37392 break;
37394 case 1:
37395 error ("the last argument must be an 8-bit immediate");
37396 break;
37397 default:
37398 gcc_unreachable ();
37400 return const0_rtx;
37403 else
37405 if (VECTOR_MODE_P (mode))
37406 op = safe_vector_operand (op, mode);
37408 /* If we aren't optimizing, only allow one memory operand to
37409 be generated. */
37410 if (memory_operand (op, mode))
37411 num_memory++;
37413 op = fixup_modeless_constant (op, mode);
37415 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37417 if (optimize || !match || num_memory > 1)
37418 op = copy_to_mode_reg (mode, op);
37420 else
37422 op = copy_to_reg (op);
37423 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37427 args[i].op = op;
37428 args[i].mode = mode;
37431 switch (nargs)
37433 case 1:
37434 pat = GEN_FCN (icode) (real_target, args[0].op);
37435 break;
37436 case 2:
37437 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37438 break;
37439 case 3:
37440 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37441 args[2].op);
37442 break;
37443 case 4:
37444 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37445 args[2].op, args[3].op);
37446 break;
37447 case 5:
37448 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37449 args[2].op, args[3].op, args[4].op);
37450 case 6:
37451 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37452 args[2].op, args[3].op, args[4].op,
37453 args[5].op);
37454 break;
37455 default:
37456 gcc_unreachable ();
37459 if (! pat)
37460 return 0;
37462 emit_insn (pat);
37463 return target;
37466 /* Transform pattern of following layout:
37467 (parallel [
37468 set (A B)
37469 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37471 into:
37472 (set (A B))
37475 (parallel [ A B
37477 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37480 into:
37481 (parallel [ A B ... ]) */
37483 static rtx
37484 ix86_erase_embedded_rounding (rtx pat)
37486 if (GET_CODE (pat) == INSN)
37487 pat = PATTERN (pat);
37489 gcc_assert (GET_CODE (pat) == PARALLEL);
37491 if (XVECLEN (pat, 0) == 2)
37493 rtx p0 = XVECEXP (pat, 0, 0);
37494 rtx p1 = XVECEXP (pat, 0, 1);
37496 gcc_assert (GET_CODE (p0) == SET
37497 && GET_CODE (p1) == UNSPEC
37498 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37500 return p0;
37502 else
37504 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37505 int i = 0;
37506 int j = 0;
37508 for (; i < XVECLEN (pat, 0); ++i)
37510 rtx elem = XVECEXP (pat, 0, i);
37511 if (GET_CODE (elem) != UNSPEC
37512 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37513 res [j++] = elem;
37516 /* No more than 1 occurence was removed. */
37517 gcc_assert (j >= XVECLEN (pat, 0) - 1);
37519 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37523 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
37524 with rounding. */
37525 static rtx
37526 ix86_expand_sse_comi_round (const struct builtin_description *d,
37527 tree exp, rtx target)
37529 rtx pat, set_dst;
37530 tree arg0 = CALL_EXPR_ARG (exp, 0);
37531 tree arg1 = CALL_EXPR_ARG (exp, 1);
37532 tree arg2 = CALL_EXPR_ARG (exp, 2);
37533 tree arg3 = CALL_EXPR_ARG (exp, 3);
37534 rtx op0 = expand_normal (arg0);
37535 rtx op1 = expand_normal (arg1);
37536 rtx op2 = expand_normal (arg2);
37537 rtx op3 = expand_normal (arg3);
37538 enum insn_code icode = d->icode;
37539 const struct insn_data_d *insn_p = &insn_data[icode];
37540 machine_mode mode0 = insn_p->operand[0].mode;
37541 machine_mode mode1 = insn_p->operand[1].mode;
37542 enum rtx_code comparison = UNEQ;
37543 bool need_ucomi = false;
37545 /* See avxintrin.h for values. */
37546 enum rtx_code comi_comparisons[32] =
37548 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37549 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37550 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37552 bool need_ucomi_values[32] =
37554 true, false, false, true, true, false, false, true,
37555 true, false, false, true, true, false, false, true,
37556 false, true, true, false, false, true, true, false,
37557 false, true, true, false, false, true, true, false
37560 if (!CONST_INT_P (op2))
37562 error ("the third argument must be comparison constant");
37563 return const0_rtx;
37565 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37567 error ("incorrect comparison mode");
37568 return const0_rtx;
37571 if (!insn_p->operand[2].predicate (op3, SImode))
37573 error ("incorrect rounding operand");
37574 return const0_rtx;
37577 comparison = comi_comparisons[INTVAL (op2)];
37578 need_ucomi = need_ucomi_values[INTVAL (op2)];
37580 if (VECTOR_MODE_P (mode0))
37581 op0 = safe_vector_operand (op0, mode0);
37582 if (VECTOR_MODE_P (mode1))
37583 op1 = safe_vector_operand (op1, mode1);
37585 target = gen_reg_rtx (SImode);
37586 emit_move_insn (target, const0_rtx);
37587 target = gen_rtx_SUBREG (QImode, target, 0);
37589 if ((optimize && !register_operand (op0, mode0))
37590 || !insn_p->operand[0].predicate (op0, mode0))
37591 op0 = copy_to_mode_reg (mode0, op0);
37592 if ((optimize && !register_operand (op1, mode1))
37593 || !insn_p->operand[1].predicate (op1, mode1))
37594 op1 = copy_to_mode_reg (mode1, op1);
37596 if (need_ucomi)
37597 icode = icode == CODE_FOR_sse_comi_round
37598 ? CODE_FOR_sse_ucomi_round
37599 : CODE_FOR_sse2_ucomi_round;
37601 pat = GEN_FCN (icode) (op0, op1, op3);
37602 if (! pat)
37603 return 0;
37605 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
37606 if (INTVAL (op3) == NO_ROUND)
37608 pat = ix86_erase_embedded_rounding (pat);
37609 if (! pat)
37610 return 0;
37612 set_dst = SET_DEST (pat);
37614 else
37616 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37617 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37620 emit_insn (pat);
37621 emit_insn (gen_rtx_SET (VOIDmode,
37622 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37623 gen_rtx_fmt_ee (comparison, QImode,
37624 set_dst,
37625 const0_rtx)));
37627 return SUBREG_REG (target);
37630 static rtx
37631 ix86_expand_round_builtin (const struct builtin_description *d,
37632 tree exp, rtx target)
37634 rtx pat;
37635 unsigned int i, nargs;
37636 struct
37638 rtx op;
37639 machine_mode mode;
37640 } args[6];
37641 enum insn_code icode = d->icode;
37642 const struct insn_data_d *insn_p = &insn_data[icode];
37643 machine_mode tmode = insn_p->operand[0].mode;
37644 unsigned int nargs_constant = 0;
37645 unsigned int redundant_embed_rnd = 0;
37647 switch ((enum ix86_builtin_func_type) d->flag)
37649 case UINT64_FTYPE_V2DF_INT:
37650 case UINT64_FTYPE_V4SF_INT:
37651 case UINT_FTYPE_V2DF_INT:
37652 case UINT_FTYPE_V4SF_INT:
37653 case INT64_FTYPE_V2DF_INT:
37654 case INT64_FTYPE_V4SF_INT:
37655 case INT_FTYPE_V2DF_INT:
37656 case INT_FTYPE_V4SF_INT:
37657 nargs = 2;
37658 break;
37659 case V4SF_FTYPE_V4SF_UINT_INT:
37660 case V4SF_FTYPE_V4SF_UINT64_INT:
37661 case V2DF_FTYPE_V2DF_UINT64_INT:
37662 case V4SF_FTYPE_V4SF_INT_INT:
37663 case V4SF_FTYPE_V4SF_INT64_INT:
37664 case V2DF_FTYPE_V2DF_INT64_INT:
37665 case V4SF_FTYPE_V4SF_V4SF_INT:
37666 case V2DF_FTYPE_V2DF_V2DF_INT:
37667 case V4SF_FTYPE_V4SF_V2DF_INT:
37668 case V2DF_FTYPE_V2DF_V4SF_INT:
37669 nargs = 3;
37670 break;
37671 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37672 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37673 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37674 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37675 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37676 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37677 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37678 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37679 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37680 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37681 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37682 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37683 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37684 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37685 nargs = 4;
37686 break;
37687 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37688 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37689 nargs_constant = 2;
37690 nargs = 4;
37691 break;
37692 case INT_FTYPE_V4SF_V4SF_INT_INT:
37693 case INT_FTYPE_V2DF_V2DF_INT_INT:
37694 return ix86_expand_sse_comi_round (d, exp, target);
37695 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
37696 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37697 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37698 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
37699 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
37700 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
37701 nargs = 5;
37702 break;
37703 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
37704 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
37705 nargs_constant = 4;
37706 nargs = 5;
37707 break;
37708 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
37709 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
37710 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
37711 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
37712 nargs_constant = 3;
37713 nargs = 5;
37714 break;
37715 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
37716 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
37717 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
37718 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
37719 nargs = 6;
37720 nargs_constant = 4;
37721 break;
37722 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
37723 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
37724 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
37725 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
37726 nargs = 6;
37727 nargs_constant = 3;
37728 break;
37729 default:
37730 gcc_unreachable ();
37732 gcc_assert (nargs <= ARRAY_SIZE (args));
37734 if (optimize
37735 || target == 0
37736 || GET_MODE (target) != tmode
37737 || !insn_p->operand[0].predicate (target, tmode))
37738 target = gen_reg_rtx (tmode);
37740 for (i = 0; i < nargs; i++)
37742 tree arg = CALL_EXPR_ARG (exp, i);
37743 rtx op = expand_normal (arg);
37744 machine_mode mode = insn_p->operand[i + 1].mode;
37745 bool match = insn_p->operand[i + 1].predicate (op, mode);
37747 if (i == nargs - nargs_constant)
37749 if (!match)
37751 switch (icode)
37753 case CODE_FOR_avx512f_getmantv8df_mask_round:
37754 case CODE_FOR_avx512f_getmantv16sf_mask_round:
37755 case CODE_FOR_avx512f_vgetmantv2df_round:
37756 case CODE_FOR_avx512f_vgetmantv4sf_round:
37757 error ("the immediate argument must be a 4-bit immediate");
37758 return const0_rtx;
37759 case CODE_FOR_avx512f_cmpv8df3_mask_round:
37760 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
37761 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
37762 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
37763 error ("the immediate argument must be a 5-bit immediate");
37764 return const0_rtx;
37765 default:
37766 error ("the immediate argument must be an 8-bit immediate");
37767 return const0_rtx;
37771 else if (i == nargs-1)
37773 if (!insn_p->operand[nargs].predicate (op, SImode))
37775 error ("incorrect rounding operand");
37776 return const0_rtx;
37779 /* If there is no rounding use normal version of the pattern. */
37780 if (INTVAL (op) == NO_ROUND)
37781 redundant_embed_rnd = 1;
37783 else
37785 if (VECTOR_MODE_P (mode))
37786 op = safe_vector_operand (op, mode);
37788 op = fixup_modeless_constant (op, mode);
37790 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37792 if (optimize || !match)
37793 op = copy_to_mode_reg (mode, op);
37795 else
37797 op = copy_to_reg (op);
37798 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37802 args[i].op = op;
37803 args[i].mode = mode;
37806 switch (nargs)
37808 case 1:
37809 pat = GEN_FCN (icode) (target, args[0].op);
37810 break;
37811 case 2:
37812 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37813 break;
37814 case 3:
37815 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37816 args[2].op);
37817 break;
37818 case 4:
37819 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37820 args[2].op, args[3].op);
37821 break;
37822 case 5:
37823 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37824 args[2].op, args[3].op, args[4].op);
37825 case 6:
37826 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37827 args[2].op, args[3].op, args[4].op,
37828 args[5].op);
37829 break;
37830 default:
37831 gcc_unreachable ();
37834 if (!pat)
37835 return 0;
37837 if (redundant_embed_rnd)
37838 pat = ix86_erase_embedded_rounding (pat);
37840 emit_insn (pat);
37841 return target;
37844 /* Subroutine of ix86_expand_builtin to take care of special insns
37845 with variable number of operands. */
37847 static rtx
37848 ix86_expand_special_args_builtin (const struct builtin_description *d,
37849 tree exp, rtx target)
37851 tree arg;
37852 rtx pat, op;
37853 unsigned int i, nargs, arg_adjust, memory;
37854 bool aligned_mem = false;
37855 struct
37857 rtx op;
37858 machine_mode mode;
37859 } args[3];
37860 enum insn_code icode = d->icode;
37861 bool last_arg_constant = false;
37862 const struct insn_data_d *insn_p = &insn_data[icode];
37863 machine_mode tmode = insn_p->operand[0].mode;
37864 enum { load, store } klass;
37866 switch ((enum ix86_builtin_func_type) d->flag)
37868 case VOID_FTYPE_VOID:
37869 emit_insn (GEN_FCN (icode) (target));
37870 return 0;
37871 case VOID_FTYPE_UINT64:
37872 case VOID_FTYPE_UNSIGNED:
37873 nargs = 0;
37874 klass = store;
37875 memory = 0;
37876 break;
37878 case INT_FTYPE_VOID:
37879 case USHORT_FTYPE_VOID:
37880 case UINT64_FTYPE_VOID:
37881 case UNSIGNED_FTYPE_VOID:
37882 nargs = 0;
37883 klass = load;
37884 memory = 0;
37885 break;
37886 case UINT64_FTYPE_PUNSIGNED:
37887 case V2DI_FTYPE_PV2DI:
37888 case V4DI_FTYPE_PV4DI:
37889 case V32QI_FTYPE_PCCHAR:
37890 case V16QI_FTYPE_PCCHAR:
37891 case V8SF_FTYPE_PCV4SF:
37892 case V8SF_FTYPE_PCFLOAT:
37893 case V4SF_FTYPE_PCFLOAT:
37894 case V4DF_FTYPE_PCV2DF:
37895 case V4DF_FTYPE_PCDOUBLE:
37896 case V2DF_FTYPE_PCDOUBLE:
37897 case VOID_FTYPE_PVOID:
37898 case V16SI_FTYPE_PV4SI:
37899 case V16SF_FTYPE_PV4SF:
37900 case V8DI_FTYPE_PV4DI:
37901 case V8DI_FTYPE_PV8DI:
37902 case V8DF_FTYPE_PV4DF:
37903 nargs = 1;
37904 klass = load;
37905 memory = 0;
37906 switch (icode)
37908 case CODE_FOR_sse4_1_movntdqa:
37909 case CODE_FOR_avx2_movntdqa:
37910 case CODE_FOR_avx512f_movntdqa:
37911 aligned_mem = true;
37912 break;
37913 default:
37914 break;
37916 break;
37917 case VOID_FTYPE_PV2SF_V4SF:
37918 case VOID_FTYPE_PV8DI_V8DI:
37919 case VOID_FTYPE_PV4DI_V4DI:
37920 case VOID_FTYPE_PV2DI_V2DI:
37921 case VOID_FTYPE_PCHAR_V32QI:
37922 case VOID_FTYPE_PCHAR_V16QI:
37923 case VOID_FTYPE_PFLOAT_V16SF:
37924 case VOID_FTYPE_PFLOAT_V8SF:
37925 case VOID_FTYPE_PFLOAT_V4SF:
37926 case VOID_FTYPE_PDOUBLE_V8DF:
37927 case VOID_FTYPE_PDOUBLE_V4DF:
37928 case VOID_FTYPE_PDOUBLE_V2DF:
37929 case VOID_FTYPE_PLONGLONG_LONGLONG:
37930 case VOID_FTYPE_PULONGLONG_ULONGLONG:
37931 case VOID_FTYPE_PINT_INT:
37932 nargs = 1;
37933 klass = store;
37934 /* Reserve memory operand for target. */
37935 memory = ARRAY_SIZE (args);
37936 switch (icode)
37938 /* These builtins and instructions require the memory
37939 to be properly aligned. */
37940 case CODE_FOR_avx_movntv4di:
37941 case CODE_FOR_sse2_movntv2di:
37942 case CODE_FOR_avx_movntv8sf:
37943 case CODE_FOR_sse_movntv4sf:
37944 case CODE_FOR_sse4a_vmmovntv4sf:
37945 case CODE_FOR_avx_movntv4df:
37946 case CODE_FOR_sse2_movntv2df:
37947 case CODE_FOR_sse4a_vmmovntv2df:
37948 case CODE_FOR_sse2_movntidi:
37949 case CODE_FOR_sse_movntq:
37950 case CODE_FOR_sse2_movntisi:
37951 case CODE_FOR_avx512f_movntv16sf:
37952 case CODE_FOR_avx512f_movntv8df:
37953 case CODE_FOR_avx512f_movntv8di:
37954 aligned_mem = true;
37955 break;
37956 default:
37957 break;
37959 break;
37960 case V4SF_FTYPE_V4SF_PCV2SF:
37961 case V2DF_FTYPE_V2DF_PCDOUBLE:
37962 nargs = 2;
37963 klass = load;
37964 memory = 1;
37965 break;
37966 case V8SF_FTYPE_PCV8SF_V8SI:
37967 case V4DF_FTYPE_PCV4DF_V4DI:
37968 case V4SF_FTYPE_PCV4SF_V4SI:
37969 case V2DF_FTYPE_PCV2DF_V2DI:
37970 case V8SI_FTYPE_PCV8SI_V8SI:
37971 case V4DI_FTYPE_PCV4DI_V4DI:
37972 case V4SI_FTYPE_PCV4SI_V4SI:
37973 case V2DI_FTYPE_PCV2DI_V2DI:
37974 nargs = 2;
37975 klass = load;
37976 memory = 0;
37977 break;
37978 case VOID_FTYPE_PV8DF_V8DF_QI:
37979 case VOID_FTYPE_PV16SF_V16SF_HI:
37980 case VOID_FTYPE_PV8DI_V8DI_QI:
37981 case VOID_FTYPE_PV4DI_V4DI_QI:
37982 case VOID_FTYPE_PV2DI_V2DI_QI:
37983 case VOID_FTYPE_PV16SI_V16SI_HI:
37984 case VOID_FTYPE_PV8SI_V8SI_QI:
37985 case VOID_FTYPE_PV4SI_V4SI_QI:
37986 switch (icode)
37988 /* These builtins and instructions require the memory
37989 to be properly aligned. */
37990 case CODE_FOR_avx512f_storev16sf_mask:
37991 case CODE_FOR_avx512f_storev16si_mask:
37992 case CODE_FOR_avx512f_storev8df_mask:
37993 case CODE_FOR_avx512f_storev8di_mask:
37994 case CODE_FOR_avx512vl_storev8sf_mask:
37995 case CODE_FOR_avx512vl_storev8si_mask:
37996 case CODE_FOR_avx512vl_storev4df_mask:
37997 case CODE_FOR_avx512vl_storev4di_mask:
37998 case CODE_FOR_avx512vl_storev4sf_mask:
37999 case CODE_FOR_avx512vl_storev4si_mask:
38000 case CODE_FOR_avx512vl_storev2df_mask:
38001 case CODE_FOR_avx512vl_storev2di_mask:
38002 aligned_mem = true;
38003 break;
38004 default:
38005 break;
38007 /* FALLTHRU */
38008 case VOID_FTYPE_PV8SF_V8SI_V8SF:
38009 case VOID_FTYPE_PV4DF_V4DI_V4DF:
38010 case VOID_FTYPE_PV4SF_V4SI_V4SF:
38011 case VOID_FTYPE_PV2DF_V2DI_V2DF:
38012 case VOID_FTYPE_PV8SI_V8SI_V8SI:
38013 case VOID_FTYPE_PV4DI_V4DI_V4DI:
38014 case VOID_FTYPE_PV4SI_V4SI_V4SI:
38015 case VOID_FTYPE_PV2DI_V2DI_V2DI:
38016 case VOID_FTYPE_PDOUBLE_V2DF_QI:
38017 case VOID_FTYPE_PFLOAT_V4SF_QI:
38018 case VOID_FTYPE_PV8SI_V8DI_QI:
38019 case VOID_FTYPE_PV8HI_V8DI_QI:
38020 case VOID_FTYPE_PV16HI_V16SI_HI:
38021 case VOID_FTYPE_PV16QI_V8DI_QI:
38022 case VOID_FTYPE_PV16QI_V16SI_HI:
38023 case VOID_FTYPE_PV4SI_V4DI_QI:
38024 case VOID_FTYPE_PV4SI_V2DI_QI:
38025 case VOID_FTYPE_PV8HI_V4DI_QI:
38026 case VOID_FTYPE_PV8HI_V2DI_QI:
38027 case VOID_FTYPE_PV8HI_V8SI_QI:
38028 case VOID_FTYPE_PV8HI_V4SI_QI:
38029 case VOID_FTYPE_PV16QI_V4DI_QI:
38030 case VOID_FTYPE_PV16QI_V2DI_QI:
38031 case VOID_FTYPE_PV16QI_V8SI_QI:
38032 case VOID_FTYPE_PV16QI_V4SI_QI:
38033 case VOID_FTYPE_PV8HI_V8HI_QI:
38034 case VOID_FTYPE_PV16HI_V16HI_HI:
38035 case VOID_FTYPE_PV32HI_V32HI_SI:
38036 case VOID_FTYPE_PV16QI_V16QI_HI:
38037 case VOID_FTYPE_PV32QI_V32QI_SI:
38038 case VOID_FTYPE_PV64QI_V64QI_DI:
38039 case VOID_FTYPE_PV4DF_V4DF_QI:
38040 case VOID_FTYPE_PV2DF_V2DF_QI:
38041 case VOID_FTYPE_PV8SF_V8SF_QI:
38042 case VOID_FTYPE_PV4SF_V4SF_QI:
38043 nargs = 2;
38044 klass = store;
38045 /* Reserve memory operand for target. */
38046 memory = ARRAY_SIZE (args);
38047 break;
38048 case V4SF_FTYPE_PCV4SF_V4SF_QI:
38049 case V8SF_FTYPE_PCV8SF_V8SF_QI:
38050 case V16SF_FTYPE_PCV16SF_V16SF_HI:
38051 case V4SI_FTYPE_PCV4SI_V4SI_QI:
38052 case V8SI_FTYPE_PCV8SI_V8SI_QI:
38053 case V16SI_FTYPE_PCV16SI_V16SI_HI:
38054 case V2DF_FTYPE_PCV2DF_V2DF_QI:
38055 case V4DF_FTYPE_PCV4DF_V4DF_QI:
38056 case V8DF_FTYPE_PCV8DF_V8DF_QI:
38057 case V2DI_FTYPE_PCV2DI_V2DI_QI:
38058 case V4DI_FTYPE_PCV4DI_V4DI_QI:
38059 case V8DI_FTYPE_PCV8DI_V8DI_QI:
38060 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
38061 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
38062 case V8HI_FTYPE_PCV8HI_V8HI_QI:
38063 case V16HI_FTYPE_PCV16HI_V16HI_HI:
38064 case V32HI_FTYPE_PCV32HI_V32HI_SI:
38065 case V16QI_FTYPE_PCV16QI_V16QI_HI:
38066 case V32QI_FTYPE_PCV32QI_V32QI_SI:
38067 case V64QI_FTYPE_PCV64QI_V64QI_DI:
38068 nargs = 3;
38069 klass = load;
38070 memory = 0;
38071 switch (icode)
38073 /* These builtins and instructions require the memory
38074 to be properly aligned. */
38075 case CODE_FOR_avx512f_loadv16sf_mask:
38076 case CODE_FOR_avx512f_loadv16si_mask:
38077 case CODE_FOR_avx512f_loadv8df_mask:
38078 case CODE_FOR_avx512f_loadv8di_mask:
38079 case CODE_FOR_avx512vl_loadv8sf_mask:
38080 case CODE_FOR_avx512vl_loadv8si_mask:
38081 case CODE_FOR_avx512vl_loadv4df_mask:
38082 case CODE_FOR_avx512vl_loadv4di_mask:
38083 case CODE_FOR_avx512vl_loadv4sf_mask:
38084 case CODE_FOR_avx512vl_loadv4si_mask:
38085 case CODE_FOR_avx512vl_loadv2df_mask:
38086 case CODE_FOR_avx512vl_loadv2di_mask:
38087 case CODE_FOR_avx512bw_loadv64qi_mask:
38088 case CODE_FOR_avx512vl_loadv32qi_mask:
38089 case CODE_FOR_avx512vl_loadv16qi_mask:
38090 case CODE_FOR_avx512bw_loadv32hi_mask:
38091 case CODE_FOR_avx512vl_loadv16hi_mask:
38092 case CODE_FOR_avx512vl_loadv8hi_mask:
38093 aligned_mem = true;
38094 break;
38095 default:
38096 break;
38098 break;
38099 case VOID_FTYPE_UINT_UINT_UINT:
38100 case VOID_FTYPE_UINT64_UINT_UINT:
38101 case UCHAR_FTYPE_UINT_UINT_UINT:
38102 case UCHAR_FTYPE_UINT64_UINT_UINT:
38103 nargs = 3;
38104 klass = load;
38105 memory = ARRAY_SIZE (args);
38106 last_arg_constant = true;
38107 break;
38108 default:
38109 gcc_unreachable ();
38112 gcc_assert (nargs <= ARRAY_SIZE (args));
38114 if (klass == store)
38116 arg = CALL_EXPR_ARG (exp, 0);
38117 op = expand_normal (arg);
38118 gcc_assert (target == 0);
38119 if (memory)
38121 op = ix86_zero_extend_to_Pmode (op);
38122 target = gen_rtx_MEM (tmode, op);
38123 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
38124 on it. Try to improve it using get_pointer_alignment,
38125 and if the special builtin is one that requires strict
38126 mode alignment, also from it's GET_MODE_ALIGNMENT.
38127 Failure to do so could lead to ix86_legitimate_combined_insn
38128 rejecting all changes to such insns. */
38129 unsigned int align = get_pointer_alignment (arg);
38130 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
38131 align = GET_MODE_ALIGNMENT (tmode);
38132 if (MEM_ALIGN (target) < align)
38133 set_mem_align (target, align);
38135 else
38136 target = force_reg (tmode, op);
38137 arg_adjust = 1;
38139 else
38141 arg_adjust = 0;
38142 if (optimize
38143 || target == 0
38144 || !register_operand (target, tmode)
38145 || GET_MODE (target) != tmode)
38146 target = gen_reg_rtx (tmode);
38149 for (i = 0; i < nargs; i++)
38151 machine_mode mode = insn_p->operand[i + 1].mode;
38152 bool match;
38154 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
38155 op = expand_normal (arg);
38156 match = insn_p->operand[i + 1].predicate (op, mode);
38158 if (last_arg_constant && (i + 1) == nargs)
38160 if (!match)
38162 if (icode == CODE_FOR_lwp_lwpvalsi3
38163 || icode == CODE_FOR_lwp_lwpinssi3
38164 || icode == CODE_FOR_lwp_lwpvaldi3
38165 || icode == CODE_FOR_lwp_lwpinsdi3)
38166 error ("the last argument must be a 32-bit immediate");
38167 else
38168 error ("the last argument must be an 8-bit immediate");
38169 return const0_rtx;
38172 else
38174 if (i == memory)
38176 /* This must be the memory operand. */
38177 op = ix86_zero_extend_to_Pmode (op);
38178 op = gen_rtx_MEM (mode, op);
38179 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
38180 on it. Try to improve it using get_pointer_alignment,
38181 and if the special builtin is one that requires strict
38182 mode alignment, also from it's GET_MODE_ALIGNMENT.
38183 Failure to do so could lead to ix86_legitimate_combined_insn
38184 rejecting all changes to such insns. */
38185 unsigned int align = get_pointer_alignment (arg);
38186 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
38187 align = GET_MODE_ALIGNMENT (mode);
38188 if (MEM_ALIGN (op) < align)
38189 set_mem_align (op, align);
38191 else
38193 /* This must be register. */
38194 if (VECTOR_MODE_P (mode))
38195 op = safe_vector_operand (op, mode);
38197 op = fixup_modeless_constant (op, mode);
38199 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38200 op = copy_to_mode_reg (mode, op);
38201 else
38203 op = copy_to_reg (op);
38204 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38209 args[i].op = op;
38210 args[i].mode = mode;
38213 switch (nargs)
38215 case 0:
38216 pat = GEN_FCN (icode) (target);
38217 break;
38218 case 1:
38219 pat = GEN_FCN (icode) (target, args[0].op);
38220 break;
38221 case 2:
38222 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38223 break;
38224 case 3:
38225 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38226 break;
38227 default:
38228 gcc_unreachable ();
38231 if (! pat)
38232 return 0;
38233 emit_insn (pat);
38234 return klass == store ? 0 : target;
38237 /* Return the integer constant in ARG. Constrain it to be in the range
38238 of the subparts of VEC_TYPE; issue an error if not. */
38240 static int
38241 get_element_number (tree vec_type, tree arg)
38243 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
38245 if (!tree_fits_uhwi_p (arg)
38246 || (elt = tree_to_uhwi (arg), elt > max))
38248 error ("selector must be an integer constant in the range 0..%wi", max);
38249 return 0;
38252 return elt;
38255 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38256 ix86_expand_vector_init. We DO have language-level syntax for this, in
38257 the form of (type){ init-list }. Except that since we can't place emms
38258 instructions from inside the compiler, we can't allow the use of MMX
38259 registers unless the user explicitly asks for it. So we do *not* define
38260 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
38261 we have builtins invoked by mmintrin.h that gives us license to emit
38262 these sorts of instructions. */
38264 static rtx
38265 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
38267 machine_mode tmode = TYPE_MODE (type);
38268 machine_mode inner_mode = GET_MODE_INNER (tmode);
38269 int i, n_elt = GET_MODE_NUNITS (tmode);
38270 rtvec v = rtvec_alloc (n_elt);
38272 gcc_assert (VECTOR_MODE_P (tmode));
38273 gcc_assert (call_expr_nargs (exp) == n_elt);
38275 for (i = 0; i < n_elt; ++i)
38277 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38278 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38281 if (!target || !register_operand (target, tmode))
38282 target = gen_reg_rtx (tmode);
38284 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38285 return target;
38288 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38289 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
38290 had a language-level syntax for referencing vector elements. */
38292 static rtx
38293 ix86_expand_vec_ext_builtin (tree exp, rtx target)
38295 machine_mode tmode, mode0;
38296 tree arg0, arg1;
38297 int elt;
38298 rtx op0;
38300 arg0 = CALL_EXPR_ARG (exp, 0);
38301 arg1 = CALL_EXPR_ARG (exp, 1);
38303 op0 = expand_normal (arg0);
38304 elt = get_element_number (TREE_TYPE (arg0), arg1);
38306 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38307 mode0 = TYPE_MODE (TREE_TYPE (arg0));
38308 gcc_assert (VECTOR_MODE_P (mode0));
38310 op0 = force_reg (mode0, op0);
38312 if (optimize || !target || !register_operand (target, tmode))
38313 target = gen_reg_rtx (tmode);
38315 ix86_expand_vector_extract (true, target, op0, elt);
38317 return target;
38320 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38321 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
38322 a language-level syntax for referencing vector elements. */
38324 static rtx
38325 ix86_expand_vec_set_builtin (tree exp)
38327 machine_mode tmode, mode1;
38328 tree arg0, arg1, arg2;
38329 int elt;
38330 rtx op0, op1, target;
38332 arg0 = CALL_EXPR_ARG (exp, 0);
38333 arg1 = CALL_EXPR_ARG (exp, 1);
38334 arg2 = CALL_EXPR_ARG (exp, 2);
38336 tmode = TYPE_MODE (TREE_TYPE (arg0));
38337 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38338 gcc_assert (VECTOR_MODE_P (tmode));
38340 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38341 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38342 elt = get_element_number (TREE_TYPE (arg0), arg2);
38344 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38345 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38347 op0 = force_reg (tmode, op0);
38348 op1 = force_reg (mode1, op1);
38350 /* OP0 is the source of these builtin functions and shouldn't be
38351 modified. Create a copy, use it and return it as target. */
38352 target = gen_reg_rtx (tmode);
38353 emit_move_insn (target, op0);
38354 ix86_expand_vector_set (true, target, op1, elt);
38356 return target;
38359 /* Emit conditional move of SRC to DST with condition
38360 OP1 CODE OP2. */
38361 static void
38362 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38364 rtx t;
38366 if (TARGET_CMOVE)
38368 t = ix86_expand_compare (code, op1, op2);
38369 emit_insn (gen_rtx_SET (VOIDmode, dst,
38370 gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38371 src, dst)));
38373 else
38375 rtx nomove = gen_label_rtx ();
38376 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38377 const0_rtx, GET_MODE (op1), 1, nomove);
38378 emit_move_insn (dst, src);
38379 emit_label (nomove);
38383 /* Choose max of DST and SRC and put it to DST. */
38384 static void
38385 ix86_emit_move_max (rtx dst, rtx src)
38387 ix86_emit_cmove (dst, src, LTU, dst, src);
38390 /* Expand an expression EXP that calls a built-in function,
38391 with result going to TARGET if that's convenient
38392 (and in mode MODE if that's convenient).
38393 SUBTARGET may be used as the target for computing one of EXP's operands.
38394 IGNORE is nonzero if the value is to be ignored. */
38396 static rtx
38397 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38398 machine_mode mode, int ignore)
38400 const struct builtin_description *d;
38401 size_t i;
38402 enum insn_code icode;
38403 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38404 tree arg0, arg1, arg2, arg3, arg4;
38405 rtx op0, op1, op2, op3, op4, pat, insn;
38406 machine_mode mode0, mode1, mode2, mode3, mode4;
38407 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38409 /* For CPU builtins that can be folded, fold first and expand the fold. */
38410 switch (fcode)
38412 case IX86_BUILTIN_CPU_INIT:
38414 /* Make it call __cpu_indicator_init in libgcc. */
38415 tree call_expr, fndecl, type;
38416 type = build_function_type_list (integer_type_node, NULL_TREE);
38417 fndecl = build_fn_decl ("__cpu_indicator_init", type);
38418 call_expr = build_call_expr (fndecl, 0);
38419 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38421 case IX86_BUILTIN_CPU_IS:
38422 case IX86_BUILTIN_CPU_SUPPORTS:
38424 tree arg0 = CALL_EXPR_ARG (exp, 0);
38425 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38426 gcc_assert (fold_expr != NULL_TREE);
38427 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38431 /* Determine whether the builtin function is available under the current ISA.
38432 Originally the builtin was not created if it wasn't applicable to the
38433 current ISA based on the command line switches. With function specific
38434 options, we need to check in the context of the function making the call
38435 whether it is supported. */
38436 if (ix86_builtins_isa[fcode].isa
38437 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38439 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38440 NULL, (enum fpmath_unit) 0, false);
38442 if (!opts)
38443 error ("%qE needs unknown isa option", fndecl);
38444 else
38446 gcc_assert (opts != NULL);
38447 error ("%qE needs isa option %s", fndecl, opts);
38448 free (opts);
38450 return const0_rtx;
38453 switch (fcode)
38455 case IX86_BUILTIN_BNDMK:
38456 if (!target
38457 || GET_MODE (target) != BNDmode
38458 || !register_operand (target, BNDmode))
38459 target = gen_reg_rtx (BNDmode);
38461 arg0 = CALL_EXPR_ARG (exp, 0);
38462 arg1 = CALL_EXPR_ARG (exp, 1);
38464 op0 = expand_normal (arg0);
38465 op1 = expand_normal (arg1);
38467 if (!register_operand (op0, Pmode))
38468 op0 = ix86_zero_extend_to_Pmode (op0);
38469 if (!register_operand (op1, Pmode))
38470 op1 = ix86_zero_extend_to_Pmode (op1);
38472 /* Builtin arg1 is size of block but instruction op1 should
38473 be (size - 1). */
38474 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38475 NULL_RTX, 1, OPTAB_DIRECT);
38477 emit_insn (BNDmode == BND64mode
38478 ? gen_bnd64_mk (target, op0, op1)
38479 : gen_bnd32_mk (target, op0, op1));
38480 return target;
38482 case IX86_BUILTIN_BNDSTX:
38483 arg0 = CALL_EXPR_ARG (exp, 0);
38484 arg1 = CALL_EXPR_ARG (exp, 1);
38485 arg2 = CALL_EXPR_ARG (exp, 2);
38487 op0 = expand_normal (arg0);
38488 op1 = expand_normal (arg1);
38489 op2 = expand_normal (arg2);
38491 if (!register_operand (op0, Pmode))
38492 op0 = ix86_zero_extend_to_Pmode (op0);
38493 if (!register_operand (op1, BNDmode))
38494 op1 = copy_to_mode_reg (BNDmode, op1);
38495 if (!register_operand (op2, Pmode))
38496 op2 = ix86_zero_extend_to_Pmode (op2);
38498 emit_insn (BNDmode == BND64mode
38499 ? gen_bnd64_stx (op2, op0, op1)
38500 : gen_bnd32_stx (op2, op0, op1));
38501 return 0;
38503 case IX86_BUILTIN_BNDLDX:
38504 if (!target
38505 || GET_MODE (target) != BNDmode
38506 || !register_operand (target, BNDmode))
38507 target = gen_reg_rtx (BNDmode);
38509 arg0 = CALL_EXPR_ARG (exp, 0);
38510 arg1 = CALL_EXPR_ARG (exp, 1);
38512 op0 = expand_normal (arg0);
38513 op1 = expand_normal (arg1);
38515 if (!register_operand (op0, Pmode))
38516 op0 = ix86_zero_extend_to_Pmode (op0);
38517 if (!register_operand (op1, Pmode))
38518 op1 = ix86_zero_extend_to_Pmode (op1);
38520 emit_insn (BNDmode == BND64mode
38521 ? gen_bnd64_ldx (target, op0, op1)
38522 : gen_bnd32_ldx (target, op0, op1));
38523 return target;
38525 case IX86_BUILTIN_BNDCL:
38526 arg0 = CALL_EXPR_ARG (exp, 0);
38527 arg1 = CALL_EXPR_ARG (exp, 1);
38529 op0 = expand_normal (arg0);
38530 op1 = expand_normal (arg1);
38532 if (!register_operand (op0, Pmode))
38533 op0 = ix86_zero_extend_to_Pmode (op0);
38534 if (!register_operand (op1, BNDmode))
38535 op1 = copy_to_mode_reg (BNDmode, op1);
38537 emit_insn (BNDmode == BND64mode
38538 ? gen_bnd64_cl (op1, op0)
38539 : gen_bnd32_cl (op1, op0));
38540 return 0;
38542 case IX86_BUILTIN_BNDCU:
38543 arg0 = CALL_EXPR_ARG (exp, 0);
38544 arg1 = CALL_EXPR_ARG (exp, 1);
38546 op0 = expand_normal (arg0);
38547 op1 = expand_normal (arg1);
38549 if (!register_operand (op0, Pmode))
38550 op0 = ix86_zero_extend_to_Pmode (op0);
38551 if (!register_operand (op1, BNDmode))
38552 op1 = copy_to_mode_reg (BNDmode, op1);
38554 emit_insn (BNDmode == BND64mode
38555 ? gen_bnd64_cu (op1, op0)
38556 : gen_bnd32_cu (op1, op0));
38557 return 0;
38559 case IX86_BUILTIN_BNDRET:
38560 arg0 = CALL_EXPR_ARG (exp, 0);
38561 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38562 target = chkp_get_rtl_bounds (arg0);
38564 /* If no bounds were specified for returned value,
38565 then use INIT bounds. It usually happens when
38566 some built-in function is expanded. */
38567 if (!target)
38569 rtx t1 = gen_reg_rtx (Pmode);
38570 rtx t2 = gen_reg_rtx (Pmode);
38571 target = gen_reg_rtx (BNDmode);
38572 emit_move_insn (t1, const0_rtx);
38573 emit_move_insn (t2, constm1_rtx);
38574 emit_insn (BNDmode == BND64mode
38575 ? gen_bnd64_mk (target, t1, t2)
38576 : gen_bnd32_mk (target, t1, t2));
38579 gcc_assert (target && REG_P (target));
38580 return target;
38582 case IX86_BUILTIN_BNDNARROW:
38584 rtx m1, m1h1, m1h2, lb, ub, t1;
38586 /* Return value and lb. */
38587 arg0 = CALL_EXPR_ARG (exp, 0);
38588 /* Bounds. */
38589 arg1 = CALL_EXPR_ARG (exp, 1);
38590 /* Size. */
38591 arg2 = CALL_EXPR_ARG (exp, 2);
38593 lb = expand_normal (arg0);
38594 op1 = expand_normal (arg1);
38595 op2 = expand_normal (arg2);
38597 /* Size was passed but we need to use (size - 1) as for bndmk. */
38598 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38599 NULL_RTX, 1, OPTAB_DIRECT);
38601 /* Add LB to size and inverse to get UB. */
38602 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38603 op2, 1, OPTAB_DIRECT);
38604 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38606 if (!register_operand (lb, Pmode))
38607 lb = ix86_zero_extend_to_Pmode (lb);
38608 if (!register_operand (ub, Pmode))
38609 ub = ix86_zero_extend_to_Pmode (ub);
38611 /* We need to move bounds to memory before any computations. */
38612 if (MEM_P (op1))
38613 m1 = op1;
38614 else
38616 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38617 emit_move_insn (m1, op1);
38620 /* Generate mem expression to be used for access to LB and UB. */
38621 m1h1 = adjust_address (m1, Pmode, 0);
38622 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38624 t1 = gen_reg_rtx (Pmode);
38626 /* Compute LB. */
38627 emit_move_insn (t1, m1h1);
38628 ix86_emit_move_max (t1, lb);
38629 emit_move_insn (m1h1, t1);
38631 /* Compute UB. UB is stored in 1's complement form. Therefore
38632 we also use max here. */
38633 emit_move_insn (t1, m1h2);
38634 ix86_emit_move_max (t1, ub);
38635 emit_move_insn (m1h2, t1);
38637 op2 = gen_reg_rtx (BNDmode);
38638 emit_move_insn (op2, m1);
38640 return chkp_join_splitted_slot (lb, op2);
38643 case IX86_BUILTIN_BNDINT:
38645 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38647 if (!target
38648 || GET_MODE (target) != BNDmode
38649 || !register_operand (target, BNDmode))
38650 target = gen_reg_rtx (BNDmode);
38652 arg0 = CALL_EXPR_ARG (exp, 0);
38653 arg1 = CALL_EXPR_ARG (exp, 1);
38655 op0 = expand_normal (arg0);
38656 op1 = expand_normal (arg1);
38658 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38659 rh1 = adjust_address (res, Pmode, 0);
38660 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38662 /* Put first bounds to temporaries. */
38663 lb1 = gen_reg_rtx (Pmode);
38664 ub1 = gen_reg_rtx (Pmode);
38665 if (MEM_P (op0))
38667 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38668 emit_move_insn (ub1, adjust_address (op0, Pmode,
38669 GET_MODE_SIZE (Pmode)));
38671 else
38673 emit_move_insn (res, op0);
38674 emit_move_insn (lb1, rh1);
38675 emit_move_insn (ub1, rh2);
38678 /* Put second bounds to temporaries. */
38679 lb2 = gen_reg_rtx (Pmode);
38680 ub2 = gen_reg_rtx (Pmode);
38681 if (MEM_P (op1))
38683 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38684 emit_move_insn (ub2, adjust_address (op1, Pmode,
38685 GET_MODE_SIZE (Pmode)));
38687 else
38689 emit_move_insn (res, op1);
38690 emit_move_insn (lb2, rh1);
38691 emit_move_insn (ub2, rh2);
38694 /* Compute LB. */
38695 ix86_emit_move_max (lb1, lb2);
38696 emit_move_insn (rh1, lb1);
38698 /* Compute UB. UB is stored in 1's complement form. Therefore
38699 we also use max here. */
38700 ix86_emit_move_max (ub1, ub2);
38701 emit_move_insn (rh2, ub1);
38703 emit_move_insn (target, res);
38705 return target;
38708 case IX86_BUILTIN_SIZEOF:
38710 tree name;
38711 rtx symbol;
38713 if (!target
38714 || GET_MODE (target) != Pmode
38715 || !register_operand (target, Pmode))
38716 target = gen_reg_rtx (Pmode);
38718 arg0 = CALL_EXPR_ARG (exp, 0);
38719 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
38721 name = DECL_ASSEMBLER_NAME (arg0);
38722 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
38724 emit_insn (Pmode == SImode
38725 ? gen_move_size_reloc_si (target, symbol)
38726 : gen_move_size_reloc_di (target, symbol));
38728 return target;
38731 case IX86_BUILTIN_BNDLOWER:
38733 rtx mem, hmem;
38735 if (!target
38736 || GET_MODE (target) != Pmode
38737 || !register_operand (target, Pmode))
38738 target = gen_reg_rtx (Pmode);
38740 arg0 = CALL_EXPR_ARG (exp, 0);
38741 op0 = expand_normal (arg0);
38743 /* We need to move bounds to memory first. */
38744 if (MEM_P (op0))
38745 mem = op0;
38746 else
38748 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38749 emit_move_insn (mem, op0);
38752 /* Generate mem expression to access LB and load it. */
38753 hmem = adjust_address (mem, Pmode, 0);
38754 emit_move_insn (target, hmem);
38756 return target;
38759 case IX86_BUILTIN_BNDUPPER:
38761 rtx mem, hmem, res;
38763 if (!target
38764 || GET_MODE (target) != Pmode
38765 || !register_operand (target, Pmode))
38766 target = gen_reg_rtx (Pmode);
38768 arg0 = CALL_EXPR_ARG (exp, 0);
38769 op0 = expand_normal (arg0);
38771 /* We need to move bounds to memory first. */
38772 if (MEM_P (op0))
38773 mem = op0;
38774 else
38776 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38777 emit_move_insn (mem, op0);
38780 /* Generate mem expression to access UB. */
38781 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
38783 /* We need to inverse all bits of UB. */
38784 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
38786 if (res != target)
38787 emit_move_insn (target, res);
38789 return target;
38792 case IX86_BUILTIN_MASKMOVQ:
38793 case IX86_BUILTIN_MASKMOVDQU:
38794 icode = (fcode == IX86_BUILTIN_MASKMOVQ
38795 ? CODE_FOR_mmx_maskmovq
38796 : CODE_FOR_sse2_maskmovdqu);
38797 /* Note the arg order is different from the operand order. */
38798 arg1 = CALL_EXPR_ARG (exp, 0);
38799 arg2 = CALL_EXPR_ARG (exp, 1);
38800 arg0 = CALL_EXPR_ARG (exp, 2);
38801 op0 = expand_normal (arg0);
38802 op1 = expand_normal (arg1);
38803 op2 = expand_normal (arg2);
38804 mode0 = insn_data[icode].operand[0].mode;
38805 mode1 = insn_data[icode].operand[1].mode;
38806 mode2 = insn_data[icode].operand[2].mode;
38808 op0 = ix86_zero_extend_to_Pmode (op0);
38809 op0 = gen_rtx_MEM (mode1, op0);
38811 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38812 op0 = copy_to_mode_reg (mode0, op0);
38813 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38814 op1 = copy_to_mode_reg (mode1, op1);
38815 if (!insn_data[icode].operand[2].predicate (op2, mode2))
38816 op2 = copy_to_mode_reg (mode2, op2);
38817 pat = GEN_FCN (icode) (op0, op1, op2);
38818 if (! pat)
38819 return 0;
38820 emit_insn (pat);
38821 return 0;
38823 case IX86_BUILTIN_LDMXCSR:
38824 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
38825 target = assign_386_stack_local (SImode, SLOT_TEMP);
38826 emit_move_insn (target, op0);
38827 emit_insn (gen_sse_ldmxcsr (target));
38828 return 0;
38830 case IX86_BUILTIN_STMXCSR:
38831 target = assign_386_stack_local (SImode, SLOT_TEMP);
38832 emit_insn (gen_sse_stmxcsr (target));
38833 return copy_to_mode_reg (SImode, target);
38835 case IX86_BUILTIN_CLFLUSH:
38836 arg0 = CALL_EXPR_ARG (exp, 0);
38837 op0 = expand_normal (arg0);
38838 icode = CODE_FOR_sse2_clflush;
38839 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38840 op0 = ix86_zero_extend_to_Pmode (op0);
38842 emit_insn (gen_sse2_clflush (op0));
38843 return 0;
38845 case IX86_BUILTIN_CLWB:
38846 arg0 = CALL_EXPR_ARG (exp, 0);
38847 op0 = expand_normal (arg0);
38848 icode = CODE_FOR_clwb;
38849 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38850 op0 = ix86_zero_extend_to_Pmode (op0);
38852 emit_insn (gen_clwb (op0));
38853 return 0;
38855 case IX86_BUILTIN_CLFLUSHOPT:
38856 arg0 = CALL_EXPR_ARG (exp, 0);
38857 op0 = expand_normal (arg0);
38858 icode = CODE_FOR_clflushopt;
38859 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38860 op0 = ix86_zero_extend_to_Pmode (op0);
38862 emit_insn (gen_clflushopt (op0));
38863 return 0;
38865 case IX86_BUILTIN_MONITOR:
38866 arg0 = CALL_EXPR_ARG (exp, 0);
38867 arg1 = CALL_EXPR_ARG (exp, 1);
38868 arg2 = CALL_EXPR_ARG (exp, 2);
38869 op0 = expand_normal (arg0);
38870 op1 = expand_normal (arg1);
38871 op2 = expand_normal (arg2);
38872 if (!REG_P (op0))
38873 op0 = ix86_zero_extend_to_Pmode (op0);
38874 if (!REG_P (op1))
38875 op1 = copy_to_mode_reg (SImode, op1);
38876 if (!REG_P (op2))
38877 op2 = copy_to_mode_reg (SImode, op2);
38878 emit_insn (ix86_gen_monitor (op0, op1, op2));
38879 return 0;
38881 case IX86_BUILTIN_MWAIT:
38882 arg0 = CALL_EXPR_ARG (exp, 0);
38883 arg1 = CALL_EXPR_ARG (exp, 1);
38884 op0 = expand_normal (arg0);
38885 op1 = expand_normal (arg1);
38886 if (!REG_P (op0))
38887 op0 = copy_to_mode_reg (SImode, op0);
38888 if (!REG_P (op1))
38889 op1 = copy_to_mode_reg (SImode, op1);
38890 emit_insn (gen_sse3_mwait (op0, op1));
38891 return 0;
38893 case IX86_BUILTIN_VEC_INIT_V2SI:
38894 case IX86_BUILTIN_VEC_INIT_V4HI:
38895 case IX86_BUILTIN_VEC_INIT_V8QI:
38896 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
38898 case IX86_BUILTIN_VEC_EXT_V2DF:
38899 case IX86_BUILTIN_VEC_EXT_V2DI:
38900 case IX86_BUILTIN_VEC_EXT_V4SF:
38901 case IX86_BUILTIN_VEC_EXT_V4SI:
38902 case IX86_BUILTIN_VEC_EXT_V8HI:
38903 case IX86_BUILTIN_VEC_EXT_V2SI:
38904 case IX86_BUILTIN_VEC_EXT_V4HI:
38905 case IX86_BUILTIN_VEC_EXT_V16QI:
38906 return ix86_expand_vec_ext_builtin (exp, target);
38908 case IX86_BUILTIN_VEC_SET_V2DI:
38909 case IX86_BUILTIN_VEC_SET_V4SF:
38910 case IX86_BUILTIN_VEC_SET_V4SI:
38911 case IX86_BUILTIN_VEC_SET_V8HI:
38912 case IX86_BUILTIN_VEC_SET_V4HI:
38913 case IX86_BUILTIN_VEC_SET_V16QI:
38914 return ix86_expand_vec_set_builtin (exp);
38916 case IX86_BUILTIN_INFQ:
38917 case IX86_BUILTIN_HUGE_VALQ:
38919 REAL_VALUE_TYPE inf;
38920 rtx tmp;
38922 real_inf (&inf);
38923 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
38925 tmp = validize_mem (force_const_mem (mode, tmp));
38927 if (target == 0)
38928 target = gen_reg_rtx (mode);
38930 emit_move_insn (target, tmp);
38931 return target;
38934 case IX86_BUILTIN_RDPMC:
38935 case IX86_BUILTIN_RDTSC:
38936 case IX86_BUILTIN_RDTSCP:
38938 op0 = gen_reg_rtx (DImode);
38939 op1 = gen_reg_rtx (DImode);
38941 if (fcode == IX86_BUILTIN_RDPMC)
38943 arg0 = CALL_EXPR_ARG (exp, 0);
38944 op2 = expand_normal (arg0);
38945 if (!register_operand (op2, SImode))
38946 op2 = copy_to_mode_reg (SImode, op2);
38948 insn = (TARGET_64BIT
38949 ? gen_rdpmc_rex64 (op0, op1, op2)
38950 : gen_rdpmc (op0, op2));
38951 emit_insn (insn);
38953 else if (fcode == IX86_BUILTIN_RDTSC)
38955 insn = (TARGET_64BIT
38956 ? gen_rdtsc_rex64 (op0, op1)
38957 : gen_rdtsc (op0));
38958 emit_insn (insn);
38960 else
38962 op2 = gen_reg_rtx (SImode);
38964 insn = (TARGET_64BIT
38965 ? gen_rdtscp_rex64 (op0, op1, op2)
38966 : gen_rdtscp (op0, op2));
38967 emit_insn (insn);
38969 arg0 = CALL_EXPR_ARG (exp, 0);
38970 op4 = expand_normal (arg0);
38971 if (!address_operand (op4, VOIDmode))
38973 op4 = convert_memory_address (Pmode, op4);
38974 op4 = copy_addr_to_reg (op4);
38976 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
38979 if (target == 0)
38981 /* mode is VOIDmode if __builtin_rd* has been called
38982 without lhs. */
38983 if (mode == VOIDmode)
38984 return target;
38985 target = gen_reg_rtx (mode);
38988 if (TARGET_64BIT)
38990 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
38991 op1, 1, OPTAB_DIRECT);
38992 op0 = expand_simple_binop (DImode, IOR, op0, op1,
38993 op0, 1, OPTAB_DIRECT);
38996 emit_move_insn (target, op0);
38997 return target;
38999 case IX86_BUILTIN_FXSAVE:
39000 case IX86_BUILTIN_FXRSTOR:
39001 case IX86_BUILTIN_FXSAVE64:
39002 case IX86_BUILTIN_FXRSTOR64:
39003 case IX86_BUILTIN_FNSTENV:
39004 case IX86_BUILTIN_FLDENV:
39005 mode0 = BLKmode;
39006 switch (fcode)
39008 case IX86_BUILTIN_FXSAVE:
39009 icode = CODE_FOR_fxsave;
39010 break;
39011 case IX86_BUILTIN_FXRSTOR:
39012 icode = CODE_FOR_fxrstor;
39013 break;
39014 case IX86_BUILTIN_FXSAVE64:
39015 icode = CODE_FOR_fxsave64;
39016 break;
39017 case IX86_BUILTIN_FXRSTOR64:
39018 icode = CODE_FOR_fxrstor64;
39019 break;
39020 case IX86_BUILTIN_FNSTENV:
39021 icode = CODE_FOR_fnstenv;
39022 break;
39023 case IX86_BUILTIN_FLDENV:
39024 icode = CODE_FOR_fldenv;
39025 break;
39026 default:
39027 gcc_unreachable ();
39030 arg0 = CALL_EXPR_ARG (exp, 0);
39031 op0 = expand_normal (arg0);
39033 if (!address_operand (op0, VOIDmode))
39035 op0 = convert_memory_address (Pmode, op0);
39036 op0 = copy_addr_to_reg (op0);
39038 op0 = gen_rtx_MEM (mode0, op0);
39040 pat = GEN_FCN (icode) (op0);
39041 if (pat)
39042 emit_insn (pat);
39043 return 0;
39045 case IX86_BUILTIN_XSAVE:
39046 case IX86_BUILTIN_XRSTOR:
39047 case IX86_BUILTIN_XSAVE64:
39048 case IX86_BUILTIN_XRSTOR64:
39049 case IX86_BUILTIN_XSAVEOPT:
39050 case IX86_BUILTIN_XSAVEOPT64:
39051 case IX86_BUILTIN_XSAVES:
39052 case IX86_BUILTIN_XRSTORS:
39053 case IX86_BUILTIN_XSAVES64:
39054 case IX86_BUILTIN_XRSTORS64:
39055 case IX86_BUILTIN_XSAVEC:
39056 case IX86_BUILTIN_XSAVEC64:
39057 arg0 = CALL_EXPR_ARG (exp, 0);
39058 arg1 = CALL_EXPR_ARG (exp, 1);
39059 op0 = expand_normal (arg0);
39060 op1 = expand_normal (arg1);
39062 if (!address_operand (op0, VOIDmode))
39064 op0 = convert_memory_address (Pmode, op0);
39065 op0 = copy_addr_to_reg (op0);
39067 op0 = gen_rtx_MEM (BLKmode, op0);
39069 op1 = force_reg (DImode, op1);
39071 if (TARGET_64BIT)
39073 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
39074 NULL, 1, OPTAB_DIRECT);
39075 switch (fcode)
39077 case IX86_BUILTIN_XSAVE:
39078 icode = CODE_FOR_xsave_rex64;
39079 break;
39080 case IX86_BUILTIN_XRSTOR:
39081 icode = CODE_FOR_xrstor_rex64;
39082 break;
39083 case IX86_BUILTIN_XSAVE64:
39084 icode = CODE_FOR_xsave64;
39085 break;
39086 case IX86_BUILTIN_XRSTOR64:
39087 icode = CODE_FOR_xrstor64;
39088 break;
39089 case IX86_BUILTIN_XSAVEOPT:
39090 icode = CODE_FOR_xsaveopt_rex64;
39091 break;
39092 case IX86_BUILTIN_XSAVEOPT64:
39093 icode = CODE_FOR_xsaveopt64;
39094 break;
39095 case IX86_BUILTIN_XSAVES:
39096 icode = CODE_FOR_xsaves_rex64;
39097 break;
39098 case IX86_BUILTIN_XRSTORS:
39099 icode = CODE_FOR_xrstors_rex64;
39100 break;
39101 case IX86_BUILTIN_XSAVES64:
39102 icode = CODE_FOR_xsaves64;
39103 break;
39104 case IX86_BUILTIN_XRSTORS64:
39105 icode = CODE_FOR_xrstors64;
39106 break;
39107 case IX86_BUILTIN_XSAVEC:
39108 icode = CODE_FOR_xsavec_rex64;
39109 break;
39110 case IX86_BUILTIN_XSAVEC64:
39111 icode = CODE_FOR_xsavec64;
39112 break;
39113 default:
39114 gcc_unreachable ();
39117 op2 = gen_lowpart (SImode, op2);
39118 op1 = gen_lowpart (SImode, op1);
39119 pat = GEN_FCN (icode) (op0, op1, op2);
39121 else
39123 switch (fcode)
39125 case IX86_BUILTIN_XSAVE:
39126 icode = CODE_FOR_xsave;
39127 break;
39128 case IX86_BUILTIN_XRSTOR:
39129 icode = CODE_FOR_xrstor;
39130 break;
39131 case IX86_BUILTIN_XSAVEOPT:
39132 icode = CODE_FOR_xsaveopt;
39133 break;
39134 case IX86_BUILTIN_XSAVES:
39135 icode = CODE_FOR_xsaves;
39136 break;
39137 case IX86_BUILTIN_XRSTORS:
39138 icode = CODE_FOR_xrstors;
39139 break;
39140 case IX86_BUILTIN_XSAVEC:
39141 icode = CODE_FOR_xsavec;
39142 break;
39143 default:
39144 gcc_unreachable ();
39146 pat = GEN_FCN (icode) (op0, op1);
39149 if (pat)
39150 emit_insn (pat);
39151 return 0;
39153 case IX86_BUILTIN_LLWPCB:
39154 arg0 = CALL_EXPR_ARG (exp, 0);
39155 op0 = expand_normal (arg0);
39156 icode = CODE_FOR_lwp_llwpcb;
39157 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39158 op0 = ix86_zero_extend_to_Pmode (op0);
39159 emit_insn (gen_lwp_llwpcb (op0));
39160 return 0;
39162 case IX86_BUILTIN_SLWPCB:
39163 icode = CODE_FOR_lwp_slwpcb;
39164 if (!target
39165 || !insn_data[icode].operand[0].predicate (target, Pmode))
39166 target = gen_reg_rtx (Pmode);
39167 emit_insn (gen_lwp_slwpcb (target));
39168 return target;
39170 case IX86_BUILTIN_BEXTRI32:
39171 case IX86_BUILTIN_BEXTRI64:
39172 arg0 = CALL_EXPR_ARG (exp, 0);
39173 arg1 = CALL_EXPR_ARG (exp, 1);
39174 op0 = expand_normal (arg0);
39175 op1 = expand_normal (arg1);
39176 icode = (fcode == IX86_BUILTIN_BEXTRI32
39177 ? CODE_FOR_tbm_bextri_si
39178 : CODE_FOR_tbm_bextri_di);
39179 if (!CONST_INT_P (op1))
39181 error ("last argument must be an immediate");
39182 return const0_rtx;
39184 else
39186 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
39187 unsigned char lsb_index = INTVAL (op1) & 0xFF;
39188 op1 = GEN_INT (length);
39189 op2 = GEN_INT (lsb_index);
39190 pat = GEN_FCN (icode) (target, op0, op1, op2);
39191 if (pat)
39192 emit_insn (pat);
39193 return target;
39196 case IX86_BUILTIN_RDRAND16_STEP:
39197 icode = CODE_FOR_rdrandhi_1;
39198 mode0 = HImode;
39199 goto rdrand_step;
39201 case IX86_BUILTIN_RDRAND32_STEP:
39202 icode = CODE_FOR_rdrandsi_1;
39203 mode0 = SImode;
39204 goto rdrand_step;
39206 case IX86_BUILTIN_RDRAND64_STEP:
39207 icode = CODE_FOR_rdranddi_1;
39208 mode0 = DImode;
39210 rdrand_step:
39211 op0 = gen_reg_rtx (mode0);
39212 emit_insn (GEN_FCN (icode) (op0));
39214 arg0 = CALL_EXPR_ARG (exp, 0);
39215 op1 = expand_normal (arg0);
39216 if (!address_operand (op1, VOIDmode))
39218 op1 = convert_memory_address (Pmode, op1);
39219 op1 = copy_addr_to_reg (op1);
39221 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39223 op1 = gen_reg_rtx (SImode);
39224 emit_move_insn (op1, CONST1_RTX (SImode));
39226 /* Emit SImode conditional move. */
39227 if (mode0 == HImode)
39229 op2 = gen_reg_rtx (SImode);
39230 emit_insn (gen_zero_extendhisi2 (op2, op0));
39232 else if (mode0 == SImode)
39233 op2 = op0;
39234 else
39235 op2 = gen_rtx_SUBREG (SImode, op0, 0);
39237 if (target == 0
39238 || !register_operand (target, SImode))
39239 target = gen_reg_rtx (SImode);
39241 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
39242 const0_rtx);
39243 emit_insn (gen_rtx_SET (VOIDmode, target,
39244 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
39245 return target;
39247 case IX86_BUILTIN_RDSEED16_STEP:
39248 icode = CODE_FOR_rdseedhi_1;
39249 mode0 = HImode;
39250 goto rdseed_step;
39252 case IX86_BUILTIN_RDSEED32_STEP:
39253 icode = CODE_FOR_rdseedsi_1;
39254 mode0 = SImode;
39255 goto rdseed_step;
39257 case IX86_BUILTIN_RDSEED64_STEP:
39258 icode = CODE_FOR_rdseeddi_1;
39259 mode0 = DImode;
39261 rdseed_step:
39262 op0 = gen_reg_rtx (mode0);
39263 emit_insn (GEN_FCN (icode) (op0));
39265 arg0 = CALL_EXPR_ARG (exp, 0);
39266 op1 = expand_normal (arg0);
39267 if (!address_operand (op1, VOIDmode))
39269 op1 = convert_memory_address (Pmode, op1);
39270 op1 = copy_addr_to_reg (op1);
39272 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39274 op2 = gen_reg_rtx (QImode);
39276 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
39277 const0_rtx);
39278 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
39280 if (target == 0
39281 || !register_operand (target, SImode))
39282 target = gen_reg_rtx (SImode);
39284 emit_insn (gen_zero_extendqisi2 (target, op2));
39285 return target;
39287 case IX86_BUILTIN_SBB32:
39288 icode = CODE_FOR_subsi3_carry;
39289 mode0 = SImode;
39290 goto addcarryx;
39292 case IX86_BUILTIN_SBB64:
39293 icode = CODE_FOR_subdi3_carry;
39294 mode0 = DImode;
39295 goto addcarryx;
39297 case IX86_BUILTIN_ADDCARRYX32:
39298 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
39299 mode0 = SImode;
39300 goto addcarryx;
39302 case IX86_BUILTIN_ADDCARRYX64:
39303 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
39304 mode0 = DImode;
39306 addcarryx:
39307 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
39308 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
39309 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
39310 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
39312 op0 = gen_reg_rtx (QImode);
39314 /* Generate CF from input operand. */
39315 op1 = expand_normal (arg0);
39316 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39317 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
39319 /* Gen ADCX instruction to compute X+Y+CF. */
39320 op2 = expand_normal (arg1);
39321 op3 = expand_normal (arg2);
39323 if (!REG_P (op2))
39324 op2 = copy_to_mode_reg (mode0, op2);
39325 if (!REG_P (op3))
39326 op3 = copy_to_mode_reg (mode0, op3);
39328 op0 = gen_reg_rtx (mode0);
39330 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
39331 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
39332 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
39334 /* Store the result. */
39335 op4 = expand_normal (arg3);
39336 if (!address_operand (op4, VOIDmode))
39338 op4 = convert_memory_address (Pmode, op4);
39339 op4 = copy_addr_to_reg (op4);
39341 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39343 /* Return current CF value. */
39344 if (target == 0)
39345 target = gen_reg_rtx (QImode);
39347 PUT_MODE (pat, QImode);
39348 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
39349 return target;
39351 case IX86_BUILTIN_READ_FLAGS:
39352 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39354 if (optimize
39355 || target == NULL_RTX
39356 || !nonimmediate_operand (target, word_mode)
39357 || GET_MODE (target) != word_mode)
39358 target = gen_reg_rtx (word_mode);
39360 emit_insn (gen_pop (target));
39361 return target;
39363 case IX86_BUILTIN_WRITE_FLAGS:
39365 arg0 = CALL_EXPR_ARG (exp, 0);
39366 op0 = expand_normal (arg0);
39367 if (!general_no_elim_operand (op0, word_mode))
39368 op0 = copy_to_mode_reg (word_mode, op0);
39370 emit_insn (gen_push (op0));
39371 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39372 return 0;
39374 case IX86_BUILTIN_KORTESTC16:
39375 icode = CODE_FOR_kortestchi;
39376 mode0 = HImode;
39377 mode1 = CCCmode;
39378 goto kortest;
39380 case IX86_BUILTIN_KORTESTZ16:
39381 icode = CODE_FOR_kortestzhi;
39382 mode0 = HImode;
39383 mode1 = CCZmode;
39385 kortest:
39386 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
39387 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
39388 op0 = expand_normal (arg0);
39389 op1 = expand_normal (arg1);
39391 op0 = copy_to_reg (op0);
39392 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39393 op1 = copy_to_reg (op1);
39394 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39396 target = gen_reg_rtx (QImode);
39397 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
39399 /* Emit kortest. */
39400 emit_insn (GEN_FCN (icode) (op0, op1));
39401 /* And use setcc to return result from flags. */
39402 ix86_expand_setcc (target, EQ,
39403 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39404 return target;
39406 case IX86_BUILTIN_GATHERSIV2DF:
39407 icode = CODE_FOR_avx2_gathersiv2df;
39408 goto gather_gen;
39409 case IX86_BUILTIN_GATHERSIV4DF:
39410 icode = CODE_FOR_avx2_gathersiv4df;
39411 goto gather_gen;
39412 case IX86_BUILTIN_GATHERDIV2DF:
39413 icode = CODE_FOR_avx2_gatherdiv2df;
39414 goto gather_gen;
39415 case IX86_BUILTIN_GATHERDIV4DF:
39416 icode = CODE_FOR_avx2_gatherdiv4df;
39417 goto gather_gen;
39418 case IX86_BUILTIN_GATHERSIV4SF:
39419 icode = CODE_FOR_avx2_gathersiv4sf;
39420 goto gather_gen;
39421 case IX86_BUILTIN_GATHERSIV8SF:
39422 icode = CODE_FOR_avx2_gathersiv8sf;
39423 goto gather_gen;
39424 case IX86_BUILTIN_GATHERDIV4SF:
39425 icode = CODE_FOR_avx2_gatherdiv4sf;
39426 goto gather_gen;
39427 case IX86_BUILTIN_GATHERDIV8SF:
39428 icode = CODE_FOR_avx2_gatherdiv8sf;
39429 goto gather_gen;
39430 case IX86_BUILTIN_GATHERSIV2DI:
39431 icode = CODE_FOR_avx2_gathersiv2di;
39432 goto gather_gen;
39433 case IX86_BUILTIN_GATHERSIV4DI:
39434 icode = CODE_FOR_avx2_gathersiv4di;
39435 goto gather_gen;
39436 case IX86_BUILTIN_GATHERDIV2DI:
39437 icode = CODE_FOR_avx2_gatherdiv2di;
39438 goto gather_gen;
39439 case IX86_BUILTIN_GATHERDIV4DI:
39440 icode = CODE_FOR_avx2_gatherdiv4di;
39441 goto gather_gen;
39442 case IX86_BUILTIN_GATHERSIV4SI:
39443 icode = CODE_FOR_avx2_gathersiv4si;
39444 goto gather_gen;
39445 case IX86_BUILTIN_GATHERSIV8SI:
39446 icode = CODE_FOR_avx2_gathersiv8si;
39447 goto gather_gen;
39448 case IX86_BUILTIN_GATHERDIV4SI:
39449 icode = CODE_FOR_avx2_gatherdiv4si;
39450 goto gather_gen;
39451 case IX86_BUILTIN_GATHERDIV8SI:
39452 icode = CODE_FOR_avx2_gatherdiv8si;
39453 goto gather_gen;
39454 case IX86_BUILTIN_GATHERALTSIV4DF:
39455 icode = CODE_FOR_avx2_gathersiv4df;
39456 goto gather_gen;
39457 case IX86_BUILTIN_GATHERALTDIV8SF:
39458 icode = CODE_FOR_avx2_gatherdiv8sf;
39459 goto gather_gen;
39460 case IX86_BUILTIN_GATHERALTSIV4DI:
39461 icode = CODE_FOR_avx2_gathersiv4di;
39462 goto gather_gen;
39463 case IX86_BUILTIN_GATHERALTDIV8SI:
39464 icode = CODE_FOR_avx2_gatherdiv8si;
39465 goto gather_gen;
39466 case IX86_BUILTIN_GATHER3SIV16SF:
39467 icode = CODE_FOR_avx512f_gathersiv16sf;
39468 goto gather_gen;
39469 case IX86_BUILTIN_GATHER3SIV8DF:
39470 icode = CODE_FOR_avx512f_gathersiv8df;
39471 goto gather_gen;
39472 case IX86_BUILTIN_GATHER3DIV16SF:
39473 icode = CODE_FOR_avx512f_gatherdiv16sf;
39474 goto gather_gen;
39475 case IX86_BUILTIN_GATHER3DIV8DF:
39476 icode = CODE_FOR_avx512f_gatherdiv8df;
39477 goto gather_gen;
39478 case IX86_BUILTIN_GATHER3SIV16SI:
39479 icode = CODE_FOR_avx512f_gathersiv16si;
39480 goto gather_gen;
39481 case IX86_BUILTIN_GATHER3SIV8DI:
39482 icode = CODE_FOR_avx512f_gathersiv8di;
39483 goto gather_gen;
39484 case IX86_BUILTIN_GATHER3DIV16SI:
39485 icode = CODE_FOR_avx512f_gatherdiv16si;
39486 goto gather_gen;
39487 case IX86_BUILTIN_GATHER3DIV8DI:
39488 icode = CODE_FOR_avx512f_gatherdiv8di;
39489 goto gather_gen;
39490 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39491 icode = CODE_FOR_avx512f_gathersiv8df;
39492 goto gather_gen;
39493 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39494 icode = CODE_FOR_avx512f_gatherdiv16sf;
39495 goto gather_gen;
39496 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39497 icode = CODE_FOR_avx512f_gathersiv8di;
39498 goto gather_gen;
39499 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39500 icode = CODE_FOR_avx512f_gatherdiv16si;
39501 goto gather_gen;
39502 case IX86_BUILTIN_GATHER3SIV2DF:
39503 icode = CODE_FOR_avx512vl_gathersiv2df;
39504 goto gather_gen;
39505 case IX86_BUILTIN_GATHER3SIV4DF:
39506 icode = CODE_FOR_avx512vl_gathersiv4df;
39507 goto gather_gen;
39508 case IX86_BUILTIN_GATHER3DIV2DF:
39509 icode = CODE_FOR_avx512vl_gatherdiv2df;
39510 goto gather_gen;
39511 case IX86_BUILTIN_GATHER3DIV4DF:
39512 icode = CODE_FOR_avx512vl_gatherdiv4df;
39513 goto gather_gen;
39514 case IX86_BUILTIN_GATHER3SIV4SF:
39515 icode = CODE_FOR_avx512vl_gathersiv4sf;
39516 goto gather_gen;
39517 case IX86_BUILTIN_GATHER3SIV8SF:
39518 icode = CODE_FOR_avx512vl_gathersiv8sf;
39519 goto gather_gen;
39520 case IX86_BUILTIN_GATHER3DIV4SF:
39521 icode = CODE_FOR_avx512vl_gatherdiv4sf;
39522 goto gather_gen;
39523 case IX86_BUILTIN_GATHER3DIV8SF:
39524 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39525 goto gather_gen;
39526 case IX86_BUILTIN_GATHER3SIV2DI:
39527 icode = CODE_FOR_avx512vl_gathersiv2di;
39528 goto gather_gen;
39529 case IX86_BUILTIN_GATHER3SIV4DI:
39530 icode = CODE_FOR_avx512vl_gathersiv4di;
39531 goto gather_gen;
39532 case IX86_BUILTIN_GATHER3DIV2DI:
39533 icode = CODE_FOR_avx512vl_gatherdiv2di;
39534 goto gather_gen;
39535 case IX86_BUILTIN_GATHER3DIV4DI:
39536 icode = CODE_FOR_avx512vl_gatherdiv4di;
39537 goto gather_gen;
39538 case IX86_BUILTIN_GATHER3SIV4SI:
39539 icode = CODE_FOR_avx512vl_gathersiv4si;
39540 goto gather_gen;
39541 case IX86_BUILTIN_GATHER3SIV8SI:
39542 icode = CODE_FOR_avx512vl_gathersiv8si;
39543 goto gather_gen;
39544 case IX86_BUILTIN_GATHER3DIV4SI:
39545 icode = CODE_FOR_avx512vl_gatherdiv4si;
39546 goto gather_gen;
39547 case IX86_BUILTIN_GATHER3DIV8SI:
39548 icode = CODE_FOR_avx512vl_gatherdiv8si;
39549 goto gather_gen;
39550 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39551 icode = CODE_FOR_avx512vl_gathersiv4df;
39552 goto gather_gen;
39553 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39554 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39555 goto gather_gen;
39556 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39557 icode = CODE_FOR_avx512vl_gathersiv4di;
39558 goto gather_gen;
39559 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39560 icode = CODE_FOR_avx512vl_gatherdiv8si;
39561 goto gather_gen;
39562 case IX86_BUILTIN_SCATTERSIV16SF:
39563 icode = CODE_FOR_avx512f_scattersiv16sf;
39564 goto scatter_gen;
39565 case IX86_BUILTIN_SCATTERSIV8DF:
39566 icode = CODE_FOR_avx512f_scattersiv8df;
39567 goto scatter_gen;
39568 case IX86_BUILTIN_SCATTERDIV16SF:
39569 icode = CODE_FOR_avx512f_scatterdiv16sf;
39570 goto scatter_gen;
39571 case IX86_BUILTIN_SCATTERDIV8DF:
39572 icode = CODE_FOR_avx512f_scatterdiv8df;
39573 goto scatter_gen;
39574 case IX86_BUILTIN_SCATTERSIV16SI:
39575 icode = CODE_FOR_avx512f_scattersiv16si;
39576 goto scatter_gen;
39577 case IX86_BUILTIN_SCATTERSIV8DI:
39578 icode = CODE_FOR_avx512f_scattersiv8di;
39579 goto scatter_gen;
39580 case IX86_BUILTIN_SCATTERDIV16SI:
39581 icode = CODE_FOR_avx512f_scatterdiv16si;
39582 goto scatter_gen;
39583 case IX86_BUILTIN_SCATTERDIV8DI:
39584 icode = CODE_FOR_avx512f_scatterdiv8di;
39585 goto scatter_gen;
39586 case IX86_BUILTIN_SCATTERSIV8SF:
39587 icode = CODE_FOR_avx512vl_scattersiv8sf;
39588 goto scatter_gen;
39589 case IX86_BUILTIN_SCATTERSIV4SF:
39590 icode = CODE_FOR_avx512vl_scattersiv4sf;
39591 goto scatter_gen;
39592 case IX86_BUILTIN_SCATTERSIV4DF:
39593 icode = CODE_FOR_avx512vl_scattersiv4df;
39594 goto scatter_gen;
39595 case IX86_BUILTIN_SCATTERSIV2DF:
39596 icode = CODE_FOR_avx512vl_scattersiv2df;
39597 goto scatter_gen;
39598 case IX86_BUILTIN_SCATTERDIV8SF:
39599 icode = CODE_FOR_avx512vl_scatterdiv8sf;
39600 goto scatter_gen;
39601 case IX86_BUILTIN_SCATTERDIV4SF:
39602 icode = CODE_FOR_avx512vl_scatterdiv4sf;
39603 goto scatter_gen;
39604 case IX86_BUILTIN_SCATTERDIV4DF:
39605 icode = CODE_FOR_avx512vl_scatterdiv4df;
39606 goto scatter_gen;
39607 case IX86_BUILTIN_SCATTERDIV2DF:
39608 icode = CODE_FOR_avx512vl_scatterdiv2df;
39609 goto scatter_gen;
39610 case IX86_BUILTIN_SCATTERSIV8SI:
39611 icode = CODE_FOR_avx512vl_scattersiv8si;
39612 goto scatter_gen;
39613 case IX86_BUILTIN_SCATTERSIV4SI:
39614 icode = CODE_FOR_avx512vl_scattersiv4si;
39615 goto scatter_gen;
39616 case IX86_BUILTIN_SCATTERSIV4DI:
39617 icode = CODE_FOR_avx512vl_scattersiv4di;
39618 goto scatter_gen;
39619 case IX86_BUILTIN_SCATTERSIV2DI:
39620 icode = CODE_FOR_avx512vl_scattersiv2di;
39621 goto scatter_gen;
39622 case IX86_BUILTIN_SCATTERDIV8SI:
39623 icode = CODE_FOR_avx512vl_scatterdiv8si;
39624 goto scatter_gen;
39625 case IX86_BUILTIN_SCATTERDIV4SI:
39626 icode = CODE_FOR_avx512vl_scatterdiv4si;
39627 goto scatter_gen;
39628 case IX86_BUILTIN_SCATTERDIV4DI:
39629 icode = CODE_FOR_avx512vl_scatterdiv4di;
39630 goto scatter_gen;
39631 case IX86_BUILTIN_SCATTERDIV2DI:
39632 icode = CODE_FOR_avx512vl_scatterdiv2di;
39633 goto scatter_gen;
39634 case IX86_BUILTIN_GATHERPFDPD:
39635 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39636 goto vec_prefetch_gen;
39637 case IX86_BUILTIN_GATHERPFDPS:
39638 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39639 goto vec_prefetch_gen;
39640 case IX86_BUILTIN_GATHERPFQPD:
39641 icode = CODE_FOR_avx512pf_gatherpfv8didf;
39642 goto vec_prefetch_gen;
39643 case IX86_BUILTIN_GATHERPFQPS:
39644 icode = CODE_FOR_avx512pf_gatherpfv8disf;
39645 goto vec_prefetch_gen;
39646 case IX86_BUILTIN_SCATTERPFDPD:
39647 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39648 goto vec_prefetch_gen;
39649 case IX86_BUILTIN_SCATTERPFDPS:
39650 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39651 goto vec_prefetch_gen;
39652 case IX86_BUILTIN_SCATTERPFQPD:
39653 icode = CODE_FOR_avx512pf_scatterpfv8didf;
39654 goto vec_prefetch_gen;
39655 case IX86_BUILTIN_SCATTERPFQPS:
39656 icode = CODE_FOR_avx512pf_scatterpfv8disf;
39657 goto vec_prefetch_gen;
39659 gather_gen:
39660 rtx half;
39661 rtx (*gen) (rtx, rtx);
39663 arg0 = CALL_EXPR_ARG (exp, 0);
39664 arg1 = CALL_EXPR_ARG (exp, 1);
39665 arg2 = CALL_EXPR_ARG (exp, 2);
39666 arg3 = CALL_EXPR_ARG (exp, 3);
39667 arg4 = CALL_EXPR_ARG (exp, 4);
39668 op0 = expand_normal (arg0);
39669 op1 = expand_normal (arg1);
39670 op2 = expand_normal (arg2);
39671 op3 = expand_normal (arg3);
39672 op4 = expand_normal (arg4);
39673 /* Note the arg order is different from the operand order. */
39674 mode0 = insn_data[icode].operand[1].mode;
39675 mode2 = insn_data[icode].operand[3].mode;
39676 mode3 = insn_data[icode].operand[4].mode;
39677 mode4 = insn_data[icode].operand[5].mode;
39679 if (target == NULL_RTX
39680 || GET_MODE (target) != insn_data[icode].operand[0].mode
39681 || !insn_data[icode].operand[0].predicate (target,
39682 GET_MODE (target)))
39683 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
39684 else
39685 subtarget = target;
39687 switch (fcode)
39689 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39690 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39691 half = gen_reg_rtx (V8SImode);
39692 if (!nonimmediate_operand (op2, V16SImode))
39693 op2 = copy_to_mode_reg (V16SImode, op2);
39694 emit_insn (gen_vec_extract_lo_v16si (half, op2));
39695 op2 = half;
39696 break;
39697 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39698 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39699 case IX86_BUILTIN_GATHERALTSIV4DF:
39700 case IX86_BUILTIN_GATHERALTSIV4DI:
39701 half = gen_reg_rtx (V4SImode);
39702 if (!nonimmediate_operand (op2, V8SImode))
39703 op2 = copy_to_mode_reg (V8SImode, op2);
39704 emit_insn (gen_vec_extract_lo_v8si (half, op2));
39705 op2 = half;
39706 break;
39707 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39708 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39709 half = gen_reg_rtx (mode0);
39710 if (mode0 == V8SFmode)
39711 gen = gen_vec_extract_lo_v16sf;
39712 else
39713 gen = gen_vec_extract_lo_v16si;
39714 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39715 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39716 emit_insn (gen (half, op0));
39717 op0 = half;
39718 if (GET_MODE (op3) != VOIDmode)
39720 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39721 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39722 emit_insn (gen (half, op3));
39723 op3 = half;
39725 break;
39726 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39727 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39728 case IX86_BUILTIN_GATHERALTDIV8SF:
39729 case IX86_BUILTIN_GATHERALTDIV8SI:
39730 half = gen_reg_rtx (mode0);
39731 if (mode0 == V4SFmode)
39732 gen = gen_vec_extract_lo_v8sf;
39733 else
39734 gen = gen_vec_extract_lo_v8si;
39735 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39736 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39737 emit_insn (gen (half, op0));
39738 op0 = half;
39739 if (GET_MODE (op3) != VOIDmode)
39741 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39742 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39743 emit_insn (gen (half, op3));
39744 op3 = half;
39746 break;
39747 default:
39748 break;
39751 /* Force memory operand only with base register here. But we
39752 don't want to do it on memory operand for other builtin
39753 functions. */
39754 op1 = ix86_zero_extend_to_Pmode (op1);
39756 if (!insn_data[icode].operand[1].predicate (op0, mode0))
39757 op0 = copy_to_mode_reg (mode0, op0);
39758 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
39759 op1 = copy_to_mode_reg (Pmode, op1);
39760 if (!insn_data[icode].operand[3].predicate (op2, mode2))
39761 op2 = copy_to_mode_reg (mode2, op2);
39763 op3 = fixup_modeless_constant (op3, mode3);
39765 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
39767 if (!insn_data[icode].operand[4].predicate (op3, mode3))
39768 op3 = copy_to_mode_reg (mode3, op3);
39770 else
39772 op3 = copy_to_reg (op3);
39773 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
39775 if (!insn_data[icode].operand[5].predicate (op4, mode4))
39777 error ("the last argument must be scale 1, 2, 4, 8");
39778 return const0_rtx;
39781 /* Optimize. If mask is known to have all high bits set,
39782 replace op0 with pc_rtx to signal that the instruction
39783 overwrites the whole destination and doesn't use its
39784 previous contents. */
39785 if (optimize)
39787 if (TREE_CODE (arg3) == INTEGER_CST)
39789 if (integer_all_onesp (arg3))
39790 op0 = pc_rtx;
39792 else if (TREE_CODE (arg3) == VECTOR_CST)
39794 unsigned int negative = 0;
39795 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
39797 tree cst = VECTOR_CST_ELT (arg3, i);
39798 if (TREE_CODE (cst) == INTEGER_CST
39799 && tree_int_cst_sign_bit (cst))
39800 negative++;
39801 else if (TREE_CODE (cst) == REAL_CST
39802 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
39803 negative++;
39805 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
39806 op0 = pc_rtx;
39808 else if (TREE_CODE (arg3) == SSA_NAME
39809 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
39811 /* Recognize also when mask is like:
39812 __v2df src = _mm_setzero_pd ();
39813 __v2df mask = _mm_cmpeq_pd (src, src);
39815 __v8sf src = _mm256_setzero_ps ();
39816 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
39817 as that is a cheaper way to load all ones into
39818 a register than having to load a constant from
39819 memory. */
39820 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
39821 if (is_gimple_call (def_stmt))
39823 tree fndecl = gimple_call_fndecl (def_stmt);
39824 if (fndecl
39825 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
39826 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
39828 case IX86_BUILTIN_CMPPD:
39829 case IX86_BUILTIN_CMPPS:
39830 case IX86_BUILTIN_CMPPD256:
39831 case IX86_BUILTIN_CMPPS256:
39832 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
39833 break;
39834 /* FALLTHRU */
39835 case IX86_BUILTIN_CMPEQPD:
39836 case IX86_BUILTIN_CMPEQPS:
39837 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
39838 && initializer_zerop (gimple_call_arg (def_stmt,
39839 1)))
39840 op0 = pc_rtx;
39841 break;
39842 default:
39843 break;
39849 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
39850 if (! pat)
39851 return const0_rtx;
39852 emit_insn (pat);
39854 switch (fcode)
39856 case IX86_BUILTIN_GATHER3DIV16SF:
39857 if (target == NULL_RTX)
39858 target = gen_reg_rtx (V8SFmode);
39859 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
39860 break;
39861 case IX86_BUILTIN_GATHER3DIV16SI:
39862 if (target == NULL_RTX)
39863 target = gen_reg_rtx (V8SImode);
39864 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
39865 break;
39866 case IX86_BUILTIN_GATHER3DIV8SF:
39867 case IX86_BUILTIN_GATHERDIV8SF:
39868 if (target == NULL_RTX)
39869 target = gen_reg_rtx (V4SFmode);
39870 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
39871 break;
39872 case IX86_BUILTIN_GATHER3DIV8SI:
39873 case IX86_BUILTIN_GATHERDIV8SI:
39874 if (target == NULL_RTX)
39875 target = gen_reg_rtx (V4SImode);
39876 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
39877 break;
39878 default:
39879 target = subtarget;
39880 break;
39882 return target;
39884 scatter_gen:
39885 arg0 = CALL_EXPR_ARG (exp, 0);
39886 arg1 = CALL_EXPR_ARG (exp, 1);
39887 arg2 = CALL_EXPR_ARG (exp, 2);
39888 arg3 = CALL_EXPR_ARG (exp, 3);
39889 arg4 = CALL_EXPR_ARG (exp, 4);
39890 op0 = expand_normal (arg0);
39891 op1 = expand_normal (arg1);
39892 op2 = expand_normal (arg2);
39893 op3 = expand_normal (arg3);
39894 op4 = expand_normal (arg4);
39895 mode1 = insn_data[icode].operand[1].mode;
39896 mode2 = insn_data[icode].operand[2].mode;
39897 mode3 = insn_data[icode].operand[3].mode;
39898 mode4 = insn_data[icode].operand[4].mode;
39900 /* Force memory operand only with base register here. But we
39901 don't want to do it on memory operand for other builtin
39902 functions. */
39903 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
39905 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39906 op0 = copy_to_mode_reg (Pmode, op0);
39908 op1 = fixup_modeless_constant (op1, mode1);
39910 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
39912 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39913 op1 = copy_to_mode_reg (mode1, op1);
39915 else
39917 op1 = copy_to_reg (op1);
39918 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
39921 if (!insn_data[icode].operand[2].predicate (op2, mode2))
39922 op2 = copy_to_mode_reg (mode2, op2);
39924 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39925 op3 = copy_to_mode_reg (mode3, op3);
39927 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39929 error ("the last argument must be scale 1, 2, 4, 8");
39930 return const0_rtx;
39933 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
39934 if (! pat)
39935 return const0_rtx;
39937 emit_insn (pat);
39938 return 0;
39940 vec_prefetch_gen:
39941 arg0 = CALL_EXPR_ARG (exp, 0);
39942 arg1 = CALL_EXPR_ARG (exp, 1);
39943 arg2 = CALL_EXPR_ARG (exp, 2);
39944 arg3 = CALL_EXPR_ARG (exp, 3);
39945 arg4 = CALL_EXPR_ARG (exp, 4);
39946 op0 = expand_normal (arg0);
39947 op1 = expand_normal (arg1);
39948 op2 = expand_normal (arg2);
39949 op3 = expand_normal (arg3);
39950 op4 = expand_normal (arg4);
39951 mode0 = insn_data[icode].operand[0].mode;
39952 mode1 = insn_data[icode].operand[1].mode;
39953 mode3 = insn_data[icode].operand[3].mode;
39954 mode4 = insn_data[icode].operand[4].mode;
39956 op0 = fixup_modeless_constant (op0, mode0);
39958 if (GET_MODE (op0) == mode0
39959 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
39961 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39962 op0 = copy_to_mode_reg (mode0, op0);
39964 else if (op0 != constm1_rtx)
39966 op0 = copy_to_reg (op0);
39967 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39970 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39971 op1 = copy_to_mode_reg (mode1, op1);
39973 /* Force memory operand only with base register here. But we
39974 don't want to do it on memory operand for other builtin
39975 functions. */
39976 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
39978 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
39979 op2 = copy_to_mode_reg (Pmode, op2);
39981 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39983 error ("the forth argument must be scale 1, 2, 4, 8");
39984 return const0_rtx;
39987 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39989 error ("incorrect hint operand");
39990 return const0_rtx;
39993 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
39994 if (! pat)
39995 return const0_rtx;
39997 emit_insn (pat);
39999 return 0;
40001 case IX86_BUILTIN_XABORT:
40002 icode = CODE_FOR_xabort;
40003 arg0 = CALL_EXPR_ARG (exp, 0);
40004 op0 = expand_normal (arg0);
40005 mode0 = insn_data[icode].operand[0].mode;
40006 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40008 error ("the xabort's argument must be an 8-bit immediate");
40009 return const0_rtx;
40011 emit_insn (gen_xabort (op0));
40012 return 0;
40014 default:
40015 break;
40018 for (i = 0, d = bdesc_special_args;
40019 i < ARRAY_SIZE (bdesc_special_args);
40020 i++, d++)
40021 if (d->code == fcode)
40022 return ix86_expand_special_args_builtin (d, exp, target);
40024 for (i = 0, d = bdesc_args;
40025 i < ARRAY_SIZE (bdesc_args);
40026 i++, d++)
40027 if (d->code == fcode)
40028 switch (fcode)
40030 case IX86_BUILTIN_FABSQ:
40031 case IX86_BUILTIN_COPYSIGNQ:
40032 if (!TARGET_SSE)
40033 /* Emit a normal call if SSE isn't available. */
40034 return expand_call (exp, target, ignore);
40035 default:
40036 return ix86_expand_args_builtin (d, exp, target);
40039 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
40040 if (d->code == fcode)
40041 return ix86_expand_sse_comi (d, exp, target);
40043 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
40044 if (d->code == fcode)
40045 return ix86_expand_round_builtin (d, exp, target);
40047 for (i = 0, d = bdesc_pcmpestr;
40048 i < ARRAY_SIZE (bdesc_pcmpestr);
40049 i++, d++)
40050 if (d->code == fcode)
40051 return ix86_expand_sse_pcmpestr (d, exp, target);
40053 for (i = 0, d = bdesc_pcmpistr;
40054 i < ARRAY_SIZE (bdesc_pcmpistr);
40055 i++, d++)
40056 if (d->code == fcode)
40057 return ix86_expand_sse_pcmpistr (d, exp, target);
40059 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
40060 if (d->code == fcode)
40061 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
40062 (enum ix86_builtin_func_type)
40063 d->flag, d->comparison);
40065 gcc_unreachable ();
40068 /* This returns the target-specific builtin with code CODE if
40069 current_function_decl has visibility on this builtin, which is checked
40070 using isa flags. Returns NULL_TREE otherwise. */
40072 static tree ix86_get_builtin (enum ix86_builtins code)
40074 struct cl_target_option *opts;
40075 tree target_tree = NULL_TREE;
40077 /* Determine the isa flags of current_function_decl. */
40079 if (current_function_decl)
40080 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
40082 if (target_tree == NULL)
40083 target_tree = target_option_default_node;
40085 opts = TREE_TARGET_OPTION (target_tree);
40087 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
40088 return ix86_builtin_decl (code, true);
40089 else
40090 return NULL_TREE;
40093 /* Return function decl for target specific builtin
40094 for given MPX builtin passed i FCODE. */
40095 static tree
40096 ix86_builtin_mpx_function (unsigned fcode)
40098 switch (fcode)
40100 case BUILT_IN_CHKP_BNDMK:
40101 return ix86_builtins[IX86_BUILTIN_BNDMK];
40103 case BUILT_IN_CHKP_BNDSTX:
40104 return ix86_builtins[IX86_BUILTIN_BNDSTX];
40106 case BUILT_IN_CHKP_BNDLDX:
40107 return ix86_builtins[IX86_BUILTIN_BNDLDX];
40109 case BUILT_IN_CHKP_BNDCL:
40110 return ix86_builtins[IX86_BUILTIN_BNDCL];
40112 case BUILT_IN_CHKP_BNDCU:
40113 return ix86_builtins[IX86_BUILTIN_BNDCU];
40115 case BUILT_IN_CHKP_BNDRET:
40116 return ix86_builtins[IX86_BUILTIN_BNDRET];
40118 case BUILT_IN_CHKP_INTERSECT:
40119 return ix86_builtins[IX86_BUILTIN_BNDINT];
40121 case BUILT_IN_CHKP_NARROW:
40122 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
40124 case BUILT_IN_CHKP_SIZEOF:
40125 return ix86_builtins[IX86_BUILTIN_SIZEOF];
40127 case BUILT_IN_CHKP_EXTRACT_LOWER:
40128 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
40130 case BUILT_IN_CHKP_EXTRACT_UPPER:
40131 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
40133 default:
40134 return NULL_TREE;
40137 gcc_unreachable ();
40140 /* Helper function for ix86_load_bounds and ix86_store_bounds.
40142 Return an address to be used to load/store bounds for pointer
40143 passed in SLOT.
40145 SLOT_NO is an integer constant holding number of a target
40146 dependent special slot to be used in case SLOT is not a memory.
40148 SPECIAL_BASE is a pointer to be used as a base of fake address
40149 to access special slots in Bounds Table. SPECIAL_BASE[-1],
40150 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
40152 static rtx
40153 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
40155 rtx addr = NULL;
40157 /* NULL slot means we pass bounds for pointer not passed to the
40158 function at all. Register slot means we pass pointer in a
40159 register. In both these cases bounds are passed via Bounds
40160 Table. Since we do not have actual pointer stored in memory,
40161 we have to use fake addresses to access Bounds Table. We
40162 start with (special_base - sizeof (void*)) and decrease this
40163 address by pointer size to get addresses for other slots. */
40164 if (!slot || REG_P (slot))
40166 gcc_assert (CONST_INT_P (slot_no));
40167 addr = plus_constant (Pmode, special_base,
40168 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
40170 /* If pointer is passed in a memory then its address is used to
40171 access Bounds Table. */
40172 else if (MEM_P (slot))
40174 addr = XEXP (slot, 0);
40175 if (!register_operand (addr, Pmode))
40176 addr = copy_addr_to_reg (addr);
40178 else
40179 gcc_unreachable ();
40181 return addr;
40184 /* Expand pass uses this hook to load bounds for function parameter
40185 PTR passed in SLOT in case its bounds are not passed in a register.
40187 If SLOT is a memory, then bounds are loaded as for regular pointer
40188 loaded from memory. PTR may be NULL in case SLOT is a memory.
40189 In such case value of PTR (if required) may be loaded from SLOT.
40191 If SLOT is NULL or a register then SLOT_NO is an integer constant
40192 holding number of the target dependent special slot which should be
40193 used to obtain bounds.
40195 Return loaded bounds. */
40197 static rtx
40198 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
40200 rtx reg = gen_reg_rtx (BNDmode);
40201 rtx addr;
40203 /* Get address to be used to access Bounds Table. Special slots start
40204 at the location of return address of the current function. */
40205 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
40207 /* Load pointer value from a memory if we don't have it. */
40208 if (!ptr)
40210 gcc_assert (MEM_P (slot));
40211 ptr = copy_addr_to_reg (slot);
40214 emit_insn (BNDmode == BND64mode
40215 ? gen_bnd64_ldx (reg, addr, ptr)
40216 : gen_bnd32_ldx (reg, addr, ptr));
40218 return reg;
40221 /* Expand pass uses this hook to store BOUNDS for call argument PTR
40222 passed in SLOT in case BOUNDS are not passed in a register.
40224 If SLOT is a memory, then BOUNDS are stored as for regular pointer
40225 stored in memory. PTR may be NULL in case SLOT is a memory.
40226 In such case value of PTR (if required) may be loaded from SLOT.
40228 If SLOT is NULL or a register then SLOT_NO is an integer constant
40229 holding number of the target dependent special slot which should be
40230 used to store BOUNDS. */
40232 static void
40233 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
40235 rtx addr;
40237 /* Get address to be used to access Bounds Table. Special slots start
40238 at the location of return address of a called function. */
40239 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
40241 /* Load pointer value from a memory if we don't have it. */
40242 if (!ptr)
40244 gcc_assert (MEM_P (slot));
40245 ptr = copy_addr_to_reg (slot);
40248 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
40249 if (!register_operand (bounds, BNDmode))
40250 bounds = copy_to_mode_reg (BNDmode, bounds);
40252 emit_insn (BNDmode == BND64mode
40253 ? gen_bnd64_stx (addr, ptr, bounds)
40254 : gen_bnd32_stx (addr, ptr, bounds));
40257 /* Load and return bounds returned by function in SLOT. */
40259 static rtx
40260 ix86_load_returned_bounds (rtx slot)
40262 rtx res;
40264 gcc_assert (REG_P (slot));
40265 res = gen_reg_rtx (BNDmode);
40266 emit_move_insn (res, slot);
40268 return res;
40271 /* Store BOUNDS returned by function into SLOT. */
40273 static void
40274 ix86_store_returned_bounds (rtx slot, rtx bounds)
40276 gcc_assert (REG_P (slot));
40277 emit_move_insn (slot, bounds);
40280 /* Returns a function decl for a vectorized version of the builtin function
40281 with builtin function code FN and the result vector type TYPE, or NULL_TREE
40282 if it is not available. */
40284 static tree
40285 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
40286 tree type_in)
40288 machine_mode in_mode, out_mode;
40289 int in_n, out_n;
40290 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
40292 if (TREE_CODE (type_out) != VECTOR_TYPE
40293 || TREE_CODE (type_in) != VECTOR_TYPE
40294 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40295 return NULL_TREE;
40297 out_mode = TYPE_MODE (TREE_TYPE (type_out));
40298 out_n = TYPE_VECTOR_SUBPARTS (type_out);
40299 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40300 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40302 switch (fn)
40304 case BUILT_IN_SQRT:
40305 if (out_mode == DFmode && in_mode == DFmode)
40307 if (out_n == 2 && in_n == 2)
40308 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40309 else if (out_n == 4 && in_n == 4)
40310 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40311 else if (out_n == 8 && in_n == 8)
40312 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40314 break;
40316 case BUILT_IN_EXP2F:
40317 if (out_mode == SFmode && in_mode == SFmode)
40319 if (out_n == 16 && in_n == 16)
40320 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40322 break;
40324 case BUILT_IN_SQRTF:
40325 if (out_mode == SFmode && in_mode == SFmode)
40327 if (out_n == 4 && in_n == 4)
40328 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40329 else if (out_n == 8 && in_n == 8)
40330 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40331 else if (out_n == 16 && in_n == 16)
40332 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40334 break;
40336 case BUILT_IN_IFLOOR:
40337 case BUILT_IN_LFLOOR:
40338 case BUILT_IN_LLFLOOR:
40339 /* The round insn does not trap on denormals. */
40340 if (flag_trapping_math || !TARGET_ROUND)
40341 break;
40343 if (out_mode == SImode && in_mode == DFmode)
40345 if (out_n == 4 && in_n == 2)
40346 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40347 else if (out_n == 8 && in_n == 4)
40348 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40349 else if (out_n == 16 && in_n == 8)
40350 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40352 break;
40354 case BUILT_IN_IFLOORF:
40355 case BUILT_IN_LFLOORF:
40356 case BUILT_IN_LLFLOORF:
40357 /* The round insn does not trap on denormals. */
40358 if (flag_trapping_math || !TARGET_ROUND)
40359 break;
40361 if (out_mode == SImode && in_mode == SFmode)
40363 if (out_n == 4 && in_n == 4)
40364 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40365 else if (out_n == 8 && in_n == 8)
40366 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40368 break;
40370 case BUILT_IN_ICEIL:
40371 case BUILT_IN_LCEIL:
40372 case BUILT_IN_LLCEIL:
40373 /* The round insn does not trap on denormals. */
40374 if (flag_trapping_math || !TARGET_ROUND)
40375 break;
40377 if (out_mode == SImode && in_mode == DFmode)
40379 if (out_n == 4 && in_n == 2)
40380 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40381 else if (out_n == 8 && in_n == 4)
40382 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40383 else if (out_n == 16 && in_n == 8)
40384 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40386 break;
40388 case BUILT_IN_ICEILF:
40389 case BUILT_IN_LCEILF:
40390 case BUILT_IN_LLCEILF:
40391 /* The round insn does not trap on denormals. */
40392 if (flag_trapping_math || !TARGET_ROUND)
40393 break;
40395 if (out_mode == SImode && in_mode == SFmode)
40397 if (out_n == 4 && in_n == 4)
40398 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40399 else if (out_n == 8 && in_n == 8)
40400 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40402 break;
40404 case BUILT_IN_IRINT:
40405 case BUILT_IN_LRINT:
40406 case BUILT_IN_LLRINT:
40407 if (out_mode == SImode && in_mode == DFmode)
40409 if (out_n == 4 && in_n == 2)
40410 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40411 else if (out_n == 8 && in_n == 4)
40412 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40414 break;
40416 case BUILT_IN_IRINTF:
40417 case BUILT_IN_LRINTF:
40418 case BUILT_IN_LLRINTF:
40419 if (out_mode == SImode && in_mode == SFmode)
40421 if (out_n == 4 && in_n == 4)
40422 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40423 else if (out_n == 8 && in_n == 8)
40424 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40426 break;
40428 case BUILT_IN_IROUND:
40429 case BUILT_IN_LROUND:
40430 case BUILT_IN_LLROUND:
40431 /* The round insn does not trap on denormals. */
40432 if (flag_trapping_math || !TARGET_ROUND)
40433 break;
40435 if (out_mode == SImode && in_mode == DFmode)
40437 if (out_n == 4 && in_n == 2)
40438 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40439 else if (out_n == 8 && in_n == 4)
40440 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40441 else if (out_n == 16 && in_n == 8)
40442 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40444 break;
40446 case BUILT_IN_IROUNDF:
40447 case BUILT_IN_LROUNDF:
40448 case BUILT_IN_LLROUNDF:
40449 /* The round insn does not trap on denormals. */
40450 if (flag_trapping_math || !TARGET_ROUND)
40451 break;
40453 if (out_mode == SImode && in_mode == SFmode)
40455 if (out_n == 4 && in_n == 4)
40456 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40457 else if (out_n == 8 && in_n == 8)
40458 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40460 break;
40462 case BUILT_IN_COPYSIGN:
40463 if (out_mode == DFmode && in_mode == DFmode)
40465 if (out_n == 2 && in_n == 2)
40466 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40467 else if (out_n == 4 && in_n == 4)
40468 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40469 else if (out_n == 8 && in_n == 8)
40470 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40472 break;
40474 case BUILT_IN_COPYSIGNF:
40475 if (out_mode == SFmode && in_mode == SFmode)
40477 if (out_n == 4 && in_n == 4)
40478 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40479 else if (out_n == 8 && in_n == 8)
40480 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40481 else if (out_n == 16 && in_n == 16)
40482 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40484 break;
40486 case BUILT_IN_FLOOR:
40487 /* The round insn does not trap on denormals. */
40488 if (flag_trapping_math || !TARGET_ROUND)
40489 break;
40491 if (out_mode == DFmode && in_mode == DFmode)
40493 if (out_n == 2 && in_n == 2)
40494 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40495 else if (out_n == 4 && in_n == 4)
40496 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40498 break;
40500 case BUILT_IN_FLOORF:
40501 /* The round insn does not trap on denormals. */
40502 if (flag_trapping_math || !TARGET_ROUND)
40503 break;
40505 if (out_mode == SFmode && in_mode == SFmode)
40507 if (out_n == 4 && in_n == 4)
40508 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40509 else if (out_n == 8 && in_n == 8)
40510 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40512 break;
40514 case BUILT_IN_CEIL:
40515 /* The round insn does not trap on denormals. */
40516 if (flag_trapping_math || !TARGET_ROUND)
40517 break;
40519 if (out_mode == DFmode && in_mode == DFmode)
40521 if (out_n == 2 && in_n == 2)
40522 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40523 else if (out_n == 4 && in_n == 4)
40524 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40526 break;
40528 case BUILT_IN_CEILF:
40529 /* The round insn does not trap on denormals. */
40530 if (flag_trapping_math || !TARGET_ROUND)
40531 break;
40533 if (out_mode == SFmode && in_mode == SFmode)
40535 if (out_n == 4 && in_n == 4)
40536 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40537 else if (out_n == 8 && in_n == 8)
40538 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40540 break;
40542 case BUILT_IN_TRUNC:
40543 /* The round insn does not trap on denormals. */
40544 if (flag_trapping_math || !TARGET_ROUND)
40545 break;
40547 if (out_mode == DFmode && in_mode == DFmode)
40549 if (out_n == 2 && in_n == 2)
40550 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40551 else if (out_n == 4 && in_n == 4)
40552 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40554 break;
40556 case BUILT_IN_TRUNCF:
40557 /* The round insn does not trap on denormals. */
40558 if (flag_trapping_math || !TARGET_ROUND)
40559 break;
40561 if (out_mode == SFmode && in_mode == SFmode)
40563 if (out_n == 4 && in_n == 4)
40564 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40565 else if (out_n == 8 && in_n == 8)
40566 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40568 break;
40570 case BUILT_IN_RINT:
40571 /* The round insn does not trap on denormals. */
40572 if (flag_trapping_math || !TARGET_ROUND)
40573 break;
40575 if (out_mode == DFmode && in_mode == DFmode)
40577 if (out_n == 2 && in_n == 2)
40578 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40579 else if (out_n == 4 && in_n == 4)
40580 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40582 break;
40584 case BUILT_IN_RINTF:
40585 /* The round insn does not trap on denormals. */
40586 if (flag_trapping_math || !TARGET_ROUND)
40587 break;
40589 if (out_mode == SFmode && in_mode == SFmode)
40591 if (out_n == 4 && in_n == 4)
40592 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40593 else if (out_n == 8 && in_n == 8)
40594 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40596 break;
40598 case BUILT_IN_ROUND:
40599 /* The round insn does not trap on denormals. */
40600 if (flag_trapping_math || !TARGET_ROUND)
40601 break;
40603 if (out_mode == DFmode && in_mode == DFmode)
40605 if (out_n == 2 && in_n == 2)
40606 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40607 else if (out_n == 4 && in_n == 4)
40608 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40610 break;
40612 case BUILT_IN_ROUNDF:
40613 /* The round insn does not trap on denormals. */
40614 if (flag_trapping_math || !TARGET_ROUND)
40615 break;
40617 if (out_mode == SFmode && in_mode == SFmode)
40619 if (out_n == 4 && in_n == 4)
40620 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40621 else if (out_n == 8 && in_n == 8)
40622 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40624 break;
40626 case BUILT_IN_FMA:
40627 if (out_mode == DFmode && in_mode == DFmode)
40629 if (out_n == 2 && in_n == 2)
40630 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40631 if (out_n == 4 && in_n == 4)
40632 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40634 break;
40636 case BUILT_IN_FMAF:
40637 if (out_mode == SFmode && in_mode == SFmode)
40639 if (out_n == 4 && in_n == 4)
40640 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40641 if (out_n == 8 && in_n == 8)
40642 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40644 break;
40646 default:
40647 break;
40650 /* Dispatch to a handler for a vectorization library. */
40651 if (ix86_veclib_handler)
40652 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40653 type_in);
40655 return NULL_TREE;
40658 /* Handler for an SVML-style interface to
40659 a library with vectorized intrinsics. */
40661 static tree
40662 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
40664 char name[20];
40665 tree fntype, new_fndecl, args;
40666 unsigned arity;
40667 const char *bname;
40668 machine_mode el_mode, in_mode;
40669 int n, in_n;
40671 /* The SVML is suitable for unsafe math only. */
40672 if (!flag_unsafe_math_optimizations)
40673 return NULL_TREE;
40675 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40676 n = TYPE_VECTOR_SUBPARTS (type_out);
40677 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40678 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40679 if (el_mode != in_mode
40680 || n != in_n)
40681 return NULL_TREE;
40683 switch (fn)
40685 case BUILT_IN_EXP:
40686 case BUILT_IN_LOG:
40687 case BUILT_IN_LOG10:
40688 case BUILT_IN_POW:
40689 case BUILT_IN_TANH:
40690 case BUILT_IN_TAN:
40691 case BUILT_IN_ATAN:
40692 case BUILT_IN_ATAN2:
40693 case BUILT_IN_ATANH:
40694 case BUILT_IN_CBRT:
40695 case BUILT_IN_SINH:
40696 case BUILT_IN_SIN:
40697 case BUILT_IN_ASINH:
40698 case BUILT_IN_ASIN:
40699 case BUILT_IN_COSH:
40700 case BUILT_IN_COS:
40701 case BUILT_IN_ACOSH:
40702 case BUILT_IN_ACOS:
40703 if (el_mode != DFmode || n != 2)
40704 return NULL_TREE;
40705 break;
40707 case BUILT_IN_EXPF:
40708 case BUILT_IN_LOGF:
40709 case BUILT_IN_LOG10F:
40710 case BUILT_IN_POWF:
40711 case BUILT_IN_TANHF:
40712 case BUILT_IN_TANF:
40713 case BUILT_IN_ATANF:
40714 case BUILT_IN_ATAN2F:
40715 case BUILT_IN_ATANHF:
40716 case BUILT_IN_CBRTF:
40717 case BUILT_IN_SINHF:
40718 case BUILT_IN_SINF:
40719 case BUILT_IN_ASINHF:
40720 case BUILT_IN_ASINF:
40721 case BUILT_IN_COSHF:
40722 case BUILT_IN_COSF:
40723 case BUILT_IN_ACOSHF:
40724 case BUILT_IN_ACOSF:
40725 if (el_mode != SFmode || n != 4)
40726 return NULL_TREE;
40727 break;
40729 default:
40730 return NULL_TREE;
40733 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40735 if (fn == BUILT_IN_LOGF)
40736 strcpy (name, "vmlsLn4");
40737 else if (fn == BUILT_IN_LOG)
40738 strcpy (name, "vmldLn2");
40739 else if (n == 4)
40741 sprintf (name, "vmls%s", bname+10);
40742 name[strlen (name)-1] = '4';
40744 else
40745 sprintf (name, "vmld%s2", bname+10);
40747 /* Convert to uppercase. */
40748 name[4] &= ~0x20;
40750 arity = 0;
40751 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40752 args;
40753 args = TREE_CHAIN (args))
40754 arity++;
40756 if (arity == 1)
40757 fntype = build_function_type_list (type_out, type_in, NULL);
40758 else
40759 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40761 /* Build a function declaration for the vectorized function. */
40762 new_fndecl = build_decl (BUILTINS_LOCATION,
40763 FUNCTION_DECL, get_identifier (name), fntype);
40764 TREE_PUBLIC (new_fndecl) = 1;
40765 DECL_EXTERNAL (new_fndecl) = 1;
40766 DECL_IS_NOVOPS (new_fndecl) = 1;
40767 TREE_READONLY (new_fndecl) = 1;
40769 return new_fndecl;
40772 /* Handler for an ACML-style interface to
40773 a library with vectorized intrinsics. */
40775 static tree
40776 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
40778 char name[20] = "__vr.._";
40779 tree fntype, new_fndecl, args;
40780 unsigned arity;
40781 const char *bname;
40782 machine_mode el_mode, in_mode;
40783 int n, in_n;
40785 /* The ACML is 64bits only and suitable for unsafe math only as
40786 it does not correctly support parts of IEEE with the required
40787 precision such as denormals. */
40788 if (!TARGET_64BIT
40789 || !flag_unsafe_math_optimizations)
40790 return NULL_TREE;
40792 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40793 n = TYPE_VECTOR_SUBPARTS (type_out);
40794 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40795 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40796 if (el_mode != in_mode
40797 || n != in_n)
40798 return NULL_TREE;
40800 switch (fn)
40802 case BUILT_IN_SIN:
40803 case BUILT_IN_COS:
40804 case BUILT_IN_EXP:
40805 case BUILT_IN_LOG:
40806 case BUILT_IN_LOG2:
40807 case BUILT_IN_LOG10:
40808 name[4] = 'd';
40809 name[5] = '2';
40810 if (el_mode != DFmode
40811 || n != 2)
40812 return NULL_TREE;
40813 break;
40815 case BUILT_IN_SINF:
40816 case BUILT_IN_COSF:
40817 case BUILT_IN_EXPF:
40818 case BUILT_IN_POWF:
40819 case BUILT_IN_LOGF:
40820 case BUILT_IN_LOG2F:
40821 case BUILT_IN_LOG10F:
40822 name[4] = 's';
40823 name[5] = '4';
40824 if (el_mode != SFmode
40825 || n != 4)
40826 return NULL_TREE;
40827 break;
40829 default:
40830 return NULL_TREE;
40833 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40834 sprintf (name + 7, "%s", bname+10);
40836 arity = 0;
40837 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40838 args;
40839 args = TREE_CHAIN (args))
40840 arity++;
40842 if (arity == 1)
40843 fntype = build_function_type_list (type_out, type_in, NULL);
40844 else
40845 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40847 /* Build a function declaration for the vectorized function. */
40848 new_fndecl = build_decl (BUILTINS_LOCATION,
40849 FUNCTION_DECL, get_identifier (name), fntype);
40850 TREE_PUBLIC (new_fndecl) = 1;
40851 DECL_EXTERNAL (new_fndecl) = 1;
40852 DECL_IS_NOVOPS (new_fndecl) = 1;
40853 TREE_READONLY (new_fndecl) = 1;
40855 return new_fndecl;
40858 /* Returns a decl of a function that implements gather load with
40859 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
40860 Return NULL_TREE if it is not available. */
40862 static tree
40863 ix86_vectorize_builtin_gather (const_tree mem_vectype,
40864 const_tree index_type, int scale)
40866 bool si;
40867 enum ix86_builtins code;
40869 if (! TARGET_AVX2)
40870 return NULL_TREE;
40872 if ((TREE_CODE (index_type) != INTEGER_TYPE
40873 && !POINTER_TYPE_P (index_type))
40874 || (TYPE_MODE (index_type) != SImode
40875 && TYPE_MODE (index_type) != DImode))
40876 return NULL_TREE;
40878 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
40879 return NULL_TREE;
40881 /* v*gather* insn sign extends index to pointer mode. */
40882 if (TYPE_PRECISION (index_type) < POINTER_SIZE
40883 && TYPE_UNSIGNED (index_type))
40884 return NULL_TREE;
40886 if (scale <= 0
40887 || scale > 8
40888 || (scale & (scale - 1)) != 0)
40889 return NULL_TREE;
40891 si = TYPE_MODE (index_type) == SImode;
40892 switch (TYPE_MODE (mem_vectype))
40894 case V2DFmode:
40895 if (TARGET_AVX512VL)
40896 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
40897 else
40898 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
40899 break;
40900 case V4DFmode:
40901 if (TARGET_AVX512VL)
40902 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
40903 else
40904 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
40905 break;
40906 case V2DImode:
40907 if (TARGET_AVX512VL)
40908 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
40909 else
40910 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
40911 break;
40912 case V4DImode:
40913 if (TARGET_AVX512VL)
40914 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
40915 else
40916 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
40917 break;
40918 case V4SFmode:
40919 if (TARGET_AVX512VL)
40920 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
40921 else
40922 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
40923 break;
40924 case V8SFmode:
40925 if (TARGET_AVX512VL)
40926 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
40927 else
40928 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
40929 break;
40930 case V4SImode:
40931 if (TARGET_AVX512VL)
40932 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
40933 else
40934 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
40935 break;
40936 case V8SImode:
40937 if (TARGET_AVX512VL)
40938 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
40939 else
40940 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
40941 break;
40942 case V8DFmode:
40943 if (TARGET_AVX512F)
40944 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
40945 else
40946 return NULL_TREE;
40947 break;
40948 case V8DImode:
40949 if (TARGET_AVX512F)
40950 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
40951 else
40952 return NULL_TREE;
40953 break;
40954 case V16SFmode:
40955 if (TARGET_AVX512F)
40956 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
40957 else
40958 return NULL_TREE;
40959 break;
40960 case V16SImode:
40961 if (TARGET_AVX512F)
40962 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
40963 else
40964 return NULL_TREE;
40965 break;
40966 default:
40967 return NULL_TREE;
40970 return ix86_get_builtin (code);
40973 /* Returns a code for a target-specific builtin that implements
40974 reciprocal of the function, or NULL_TREE if not available. */
40976 static tree
40977 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
40979 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
40980 && flag_finite_math_only && !flag_trapping_math
40981 && flag_unsafe_math_optimizations))
40982 return NULL_TREE;
40984 if (md_fn)
40985 /* Machine dependent builtins. */
40986 switch (fn)
40988 /* Vectorized version of sqrt to rsqrt conversion. */
40989 case IX86_BUILTIN_SQRTPS_NR:
40990 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
40992 case IX86_BUILTIN_SQRTPS_NR256:
40993 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
40995 default:
40996 return NULL_TREE;
40998 else
40999 /* Normal builtins. */
41000 switch (fn)
41002 /* Sqrt to rsqrt conversion. */
41003 case BUILT_IN_SQRTF:
41004 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
41006 default:
41007 return NULL_TREE;
41011 /* Helper for avx_vpermilps256_operand et al. This is also used by
41012 the expansion functions to turn the parallel back into a mask.
41013 The return value is 0 for no match and the imm8+1 for a match. */
41016 avx_vpermilp_parallel (rtx par, machine_mode mode)
41018 unsigned i, nelt = GET_MODE_NUNITS (mode);
41019 unsigned mask = 0;
41020 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
41022 if (XVECLEN (par, 0) != (int) nelt)
41023 return 0;
41025 /* Validate that all of the elements are constants, and not totally
41026 out of range. Copy the data into an integral array to make the
41027 subsequent checks easier. */
41028 for (i = 0; i < nelt; ++i)
41030 rtx er = XVECEXP (par, 0, i);
41031 unsigned HOST_WIDE_INT ei;
41033 if (!CONST_INT_P (er))
41034 return 0;
41035 ei = INTVAL (er);
41036 if (ei >= nelt)
41037 return 0;
41038 ipar[i] = ei;
41041 switch (mode)
41043 case V8DFmode:
41044 /* In the 512-bit DFmode case, we can only move elements within
41045 a 128-bit lane. First fill the second part of the mask,
41046 then fallthru. */
41047 for (i = 4; i < 6; ++i)
41049 if (ipar[i] < 4 || ipar[i] >= 6)
41050 return 0;
41051 mask |= (ipar[i] - 4) << i;
41053 for (i = 6; i < 8; ++i)
41055 if (ipar[i] < 6)
41056 return 0;
41057 mask |= (ipar[i] - 6) << i;
41059 /* FALLTHRU */
41061 case V4DFmode:
41062 /* In the 256-bit DFmode case, we can only move elements within
41063 a 128-bit lane. */
41064 for (i = 0; i < 2; ++i)
41066 if (ipar[i] >= 2)
41067 return 0;
41068 mask |= ipar[i] << i;
41070 for (i = 2; i < 4; ++i)
41072 if (ipar[i] < 2)
41073 return 0;
41074 mask |= (ipar[i] - 2) << i;
41076 break;
41078 case V16SFmode:
41079 /* In 512 bit SFmode case, permutation in the upper 256 bits
41080 must mirror the permutation in the lower 256-bits. */
41081 for (i = 0; i < 8; ++i)
41082 if (ipar[i] + 8 != ipar[i + 8])
41083 return 0;
41084 /* FALLTHRU */
41086 case V8SFmode:
41087 /* In 256 bit SFmode case, we have full freedom of
41088 movement within the low 128-bit lane, but the high 128-bit
41089 lane must mirror the exact same pattern. */
41090 for (i = 0; i < 4; ++i)
41091 if (ipar[i] + 4 != ipar[i + 4])
41092 return 0;
41093 nelt = 4;
41094 /* FALLTHRU */
41096 case V2DFmode:
41097 case V4SFmode:
41098 /* In the 128-bit case, we've full freedom in the placement of
41099 the elements from the source operand. */
41100 for (i = 0; i < nelt; ++i)
41101 mask |= ipar[i] << (i * (nelt / 2));
41102 break;
41104 default:
41105 gcc_unreachable ();
41108 /* Make sure success has a non-zero value by adding one. */
41109 return mask + 1;
41112 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
41113 the expansion functions to turn the parallel back into a mask.
41114 The return value is 0 for no match and the imm8+1 for a match. */
41117 avx_vperm2f128_parallel (rtx par, machine_mode mode)
41119 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
41120 unsigned mask = 0;
41121 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
41123 if (XVECLEN (par, 0) != (int) nelt)
41124 return 0;
41126 /* Validate that all of the elements are constants, and not totally
41127 out of range. Copy the data into an integral array to make the
41128 subsequent checks easier. */
41129 for (i = 0; i < nelt; ++i)
41131 rtx er = XVECEXP (par, 0, i);
41132 unsigned HOST_WIDE_INT ei;
41134 if (!CONST_INT_P (er))
41135 return 0;
41136 ei = INTVAL (er);
41137 if (ei >= 2 * nelt)
41138 return 0;
41139 ipar[i] = ei;
41142 /* Validate that the halves of the permute are halves. */
41143 for (i = 0; i < nelt2 - 1; ++i)
41144 if (ipar[i] + 1 != ipar[i + 1])
41145 return 0;
41146 for (i = nelt2; i < nelt - 1; ++i)
41147 if (ipar[i] + 1 != ipar[i + 1])
41148 return 0;
41150 /* Reconstruct the mask. */
41151 for (i = 0; i < 2; ++i)
41153 unsigned e = ipar[i * nelt2];
41154 if (e % nelt2)
41155 return 0;
41156 e /= nelt2;
41157 mask |= e << (i * 4);
41160 /* Make sure success has a non-zero value by adding one. */
41161 return mask + 1;
41164 /* Return a register priority for hard reg REGNO. */
41165 static int
41166 ix86_register_priority (int hard_regno)
41168 /* ebp and r13 as the base always wants a displacement, r12 as the
41169 base always wants an index. So discourage their usage in an
41170 address. */
41171 if (hard_regno == R12_REG || hard_regno == R13_REG)
41172 return 0;
41173 if (hard_regno == BP_REG)
41174 return 1;
41175 /* New x86-64 int registers result in bigger code size. Discourage
41176 them. */
41177 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
41178 return 2;
41179 /* New x86-64 SSE registers result in bigger code size. Discourage
41180 them. */
41181 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
41182 return 2;
41183 /* Usage of AX register results in smaller code. Prefer it. */
41184 if (hard_regno == AX_REG)
41185 return 4;
41186 return 3;
41189 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
41191 Put float CONST_DOUBLE in the constant pool instead of fp regs.
41192 QImode must go into class Q_REGS.
41193 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
41194 movdf to do mem-to-mem moves through integer regs. */
41196 static reg_class_t
41197 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
41199 machine_mode mode = GET_MODE (x);
41201 /* We're only allowed to return a subclass of CLASS. Many of the
41202 following checks fail for NO_REGS, so eliminate that early. */
41203 if (regclass == NO_REGS)
41204 return NO_REGS;
41206 /* All classes can load zeros. */
41207 if (x == CONST0_RTX (mode))
41208 return regclass;
41210 /* Force constants into memory if we are loading a (nonzero) constant into
41211 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
41212 instructions to load from a constant. */
41213 if (CONSTANT_P (x)
41214 && (MAYBE_MMX_CLASS_P (regclass)
41215 || MAYBE_SSE_CLASS_P (regclass)
41216 || MAYBE_MASK_CLASS_P (regclass)))
41217 return NO_REGS;
41219 /* Prefer SSE regs only, if we can use them for math. */
41220 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
41221 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
41223 /* Floating-point constants need more complex checks. */
41224 if (CONST_DOUBLE_P (x))
41226 /* General regs can load everything. */
41227 if (reg_class_subset_p (regclass, GENERAL_REGS))
41228 return regclass;
41230 /* Floats can load 0 and 1 plus some others. Note that we eliminated
41231 zero above. We only want to wind up preferring 80387 registers if
41232 we plan on doing computation with them. */
41233 if (TARGET_80387
41234 && standard_80387_constant_p (x) > 0)
41236 /* Limit class to non-sse. */
41237 if (regclass == FLOAT_SSE_REGS)
41238 return FLOAT_REGS;
41239 if (regclass == FP_TOP_SSE_REGS)
41240 return FP_TOP_REG;
41241 if (regclass == FP_SECOND_SSE_REGS)
41242 return FP_SECOND_REG;
41243 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
41244 return regclass;
41247 return NO_REGS;
41250 /* Generally when we see PLUS here, it's the function invariant
41251 (plus soft-fp const_int). Which can only be computed into general
41252 regs. */
41253 if (GET_CODE (x) == PLUS)
41254 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
41256 /* QImode constants are easy to load, but non-constant QImode data
41257 must go into Q_REGS. */
41258 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
41260 if (reg_class_subset_p (regclass, Q_REGS))
41261 return regclass;
41262 if (reg_class_subset_p (Q_REGS, regclass))
41263 return Q_REGS;
41264 return NO_REGS;
41267 return regclass;
41270 /* Discourage putting floating-point values in SSE registers unless
41271 SSE math is being used, and likewise for the 387 registers. */
41272 static reg_class_t
41273 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
41275 machine_mode mode = GET_MODE (x);
41277 /* Restrict the output reload class to the register bank that we are doing
41278 math on. If we would like not to return a subset of CLASS, reject this
41279 alternative: if reload cannot do this, it will still use its choice. */
41280 mode = GET_MODE (x);
41281 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
41282 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
41284 if (X87_FLOAT_MODE_P (mode))
41286 if (regclass == FP_TOP_SSE_REGS)
41287 return FP_TOP_REG;
41288 else if (regclass == FP_SECOND_SSE_REGS)
41289 return FP_SECOND_REG;
41290 else
41291 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41294 return regclass;
41297 static reg_class_t
41298 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41299 machine_mode mode, secondary_reload_info *sri)
41301 /* Double-word spills from general registers to non-offsettable memory
41302 references (zero-extended addresses) require special handling. */
41303 if (TARGET_64BIT
41304 && MEM_P (x)
41305 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41306 && INTEGER_CLASS_P (rclass)
41307 && !offsettable_memref_p (x))
41309 sri->icode = (in_p
41310 ? CODE_FOR_reload_noff_load
41311 : CODE_FOR_reload_noff_store);
41312 /* Add the cost of moving address to a temporary. */
41313 sri->extra_cost = 1;
41315 return NO_REGS;
41318 /* QImode spills from non-QI registers require
41319 intermediate register on 32bit targets. */
41320 if (mode == QImode
41321 && (MAYBE_MASK_CLASS_P (rclass)
41322 || (!TARGET_64BIT && !in_p
41323 && INTEGER_CLASS_P (rclass)
41324 && MAYBE_NON_Q_CLASS_P (rclass))))
41326 int regno;
41328 if (REG_P (x))
41329 regno = REGNO (x);
41330 else
41331 regno = -1;
41333 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
41334 regno = true_regnum (x);
41336 /* Return Q_REGS if the operand is in memory. */
41337 if (regno == -1)
41338 return Q_REGS;
41341 /* This condition handles corner case where an expression involving
41342 pointers gets vectorized. We're trying to use the address of a
41343 stack slot as a vector initializer.
41345 (set (reg:V2DI 74 [ vect_cst_.2 ])
41346 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41348 Eventually frame gets turned into sp+offset like this:
41350 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41351 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41352 (const_int 392 [0x188]))))
41354 That later gets turned into:
41356 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41357 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41358 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41360 We'll have the following reload recorded:
41362 Reload 0: reload_in (DI) =
41363 (plus:DI (reg/f:DI 7 sp)
41364 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41365 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41366 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41367 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41368 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41369 reload_reg_rtx: (reg:V2DI 22 xmm1)
41371 Which isn't going to work since SSE instructions can't handle scalar
41372 additions. Returning GENERAL_REGS forces the addition into integer
41373 register and reload can handle subsequent reloads without problems. */
41375 if (in_p && GET_CODE (x) == PLUS
41376 && SSE_CLASS_P (rclass)
41377 && SCALAR_INT_MODE_P (mode))
41378 return GENERAL_REGS;
41380 return NO_REGS;
41383 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
41385 static bool
41386 ix86_class_likely_spilled_p (reg_class_t rclass)
41388 switch (rclass)
41390 case AREG:
41391 case DREG:
41392 case CREG:
41393 case BREG:
41394 case AD_REGS:
41395 case SIREG:
41396 case DIREG:
41397 case SSE_FIRST_REG:
41398 case FP_TOP_REG:
41399 case FP_SECOND_REG:
41400 case BND_REGS:
41401 return true;
41403 default:
41404 break;
41407 return false;
41410 /* If we are copying between general and FP registers, we need a memory
41411 location. The same is true for SSE and MMX registers.
41413 To optimize register_move_cost performance, allow inline variant.
41415 The macro can't work reliably when one of the CLASSES is class containing
41416 registers from multiple units (SSE, MMX, integer). We avoid this by never
41417 combining those units in single alternative in the machine description.
41418 Ensure that this constraint holds to avoid unexpected surprises.
41420 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41421 enforce these sanity checks. */
41423 static inline bool
41424 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41425 machine_mode mode, int strict)
41427 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41428 return false;
41429 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41430 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41431 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41432 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41433 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41434 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41436 gcc_assert (!strict || lra_in_progress);
41437 return true;
41440 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41441 return true;
41443 /* Between mask and general, we have moves no larger than word size. */
41444 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41445 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41446 return true;
41448 /* ??? This is a lie. We do have moves between mmx/general, and for
41449 mmx/sse2. But by saying we need secondary memory we discourage the
41450 register allocator from using the mmx registers unless needed. */
41451 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41452 return true;
41454 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41456 /* SSE1 doesn't have any direct moves from other classes. */
41457 if (!TARGET_SSE2)
41458 return true;
41460 /* If the target says that inter-unit moves are more expensive
41461 than moving through memory, then don't generate them. */
41462 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41463 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41464 return true;
41466 /* Between SSE and general, we have moves no larger than word size. */
41467 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41468 return true;
41471 return false;
41474 bool
41475 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41476 machine_mode mode, int strict)
41478 return inline_secondary_memory_needed (class1, class2, mode, strict);
41481 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41483 On the 80386, this is the size of MODE in words,
41484 except in the FP regs, where a single reg is always enough. */
41486 static unsigned char
41487 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41489 if (MAYBE_INTEGER_CLASS_P (rclass))
41491 if (mode == XFmode)
41492 return (TARGET_64BIT ? 2 : 3);
41493 else if (mode == XCmode)
41494 return (TARGET_64BIT ? 4 : 6);
41495 else
41496 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41498 else
41500 if (COMPLEX_MODE_P (mode))
41501 return 2;
41502 else
41503 return 1;
41507 /* Return true if the registers in CLASS cannot represent the change from
41508 modes FROM to TO. */
41510 bool
41511 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41512 enum reg_class regclass)
41514 if (from == to)
41515 return false;
41517 /* x87 registers can't do subreg at all, as all values are reformatted
41518 to extended precision. */
41519 if (MAYBE_FLOAT_CLASS_P (regclass))
41520 return true;
41522 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41524 /* Vector registers do not support QI or HImode loads. If we don't
41525 disallow a change to these modes, reload will assume it's ok to
41526 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
41527 the vec_dupv4hi pattern. */
41528 if (GET_MODE_SIZE (from) < 4)
41529 return true;
41532 return false;
41535 /* Return the cost of moving data of mode M between a
41536 register and memory. A value of 2 is the default; this cost is
41537 relative to those in `REGISTER_MOVE_COST'.
41539 This function is used extensively by register_move_cost that is used to
41540 build tables at startup. Make it inline in this case.
41541 When IN is 2, return maximum of in and out move cost.
41543 If moving between registers and memory is more expensive than
41544 between two registers, you should define this macro to express the
41545 relative cost.
41547 Model also increased moving costs of QImode registers in non
41548 Q_REGS classes.
41550 static inline int
41551 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41552 int in)
41554 int cost;
41555 if (FLOAT_CLASS_P (regclass))
41557 int index;
41558 switch (mode)
41560 case SFmode:
41561 index = 0;
41562 break;
41563 case DFmode:
41564 index = 1;
41565 break;
41566 case XFmode:
41567 index = 2;
41568 break;
41569 default:
41570 return 100;
41572 if (in == 2)
41573 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41574 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41576 if (SSE_CLASS_P (regclass))
41578 int index;
41579 switch (GET_MODE_SIZE (mode))
41581 case 4:
41582 index = 0;
41583 break;
41584 case 8:
41585 index = 1;
41586 break;
41587 case 16:
41588 index = 2;
41589 break;
41590 default:
41591 return 100;
41593 if (in == 2)
41594 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41595 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41597 if (MMX_CLASS_P (regclass))
41599 int index;
41600 switch (GET_MODE_SIZE (mode))
41602 case 4:
41603 index = 0;
41604 break;
41605 case 8:
41606 index = 1;
41607 break;
41608 default:
41609 return 100;
41611 if (in)
41612 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41613 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41615 switch (GET_MODE_SIZE (mode))
41617 case 1:
41618 if (Q_CLASS_P (regclass) || TARGET_64BIT)
41620 if (!in)
41621 return ix86_cost->int_store[0];
41622 if (TARGET_PARTIAL_REG_DEPENDENCY
41623 && optimize_function_for_speed_p (cfun))
41624 cost = ix86_cost->movzbl_load;
41625 else
41626 cost = ix86_cost->int_load[0];
41627 if (in == 2)
41628 return MAX (cost, ix86_cost->int_store[0]);
41629 return cost;
41631 else
41633 if (in == 2)
41634 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41635 if (in)
41636 return ix86_cost->movzbl_load;
41637 else
41638 return ix86_cost->int_store[0] + 4;
41640 break;
41641 case 2:
41642 if (in == 2)
41643 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41644 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41645 default:
41646 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
41647 if (mode == TFmode)
41648 mode = XFmode;
41649 if (in == 2)
41650 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
41651 else if (in)
41652 cost = ix86_cost->int_load[2];
41653 else
41654 cost = ix86_cost->int_store[2];
41655 return (cost * (((int) GET_MODE_SIZE (mode)
41656 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
41660 static int
41661 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
41662 bool in)
41664 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
41668 /* Return the cost of moving data from a register in class CLASS1 to
41669 one in class CLASS2.
41671 It is not required that the cost always equal 2 when FROM is the same as TO;
41672 on some machines it is expensive to move between registers if they are not
41673 general registers. */
41675 static int
41676 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
41677 reg_class_t class2_i)
41679 enum reg_class class1 = (enum reg_class) class1_i;
41680 enum reg_class class2 = (enum reg_class) class2_i;
41682 /* In case we require secondary memory, compute cost of the store followed
41683 by load. In order to avoid bad register allocation choices, we need
41684 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
41686 if (inline_secondary_memory_needed (class1, class2, mode, 0))
41688 int cost = 1;
41690 cost += inline_memory_move_cost (mode, class1, 2);
41691 cost += inline_memory_move_cost (mode, class2, 2);
41693 /* In case of copying from general_purpose_register we may emit multiple
41694 stores followed by single load causing memory size mismatch stall.
41695 Count this as arbitrarily high cost of 20. */
41696 if (targetm.class_max_nregs (class1, mode)
41697 > targetm.class_max_nregs (class2, mode))
41698 cost += 20;
41700 /* In the case of FP/MMX moves, the registers actually overlap, and we
41701 have to switch modes in order to treat them differently. */
41702 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
41703 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
41704 cost += 20;
41706 return cost;
41709 /* Moves between SSE/MMX and integer unit are expensive. */
41710 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
41711 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41713 /* ??? By keeping returned value relatively high, we limit the number
41714 of moves between integer and MMX/SSE registers for all targets.
41715 Additionally, high value prevents problem with x86_modes_tieable_p(),
41716 where integer modes in MMX/SSE registers are not tieable
41717 because of missing QImode and HImode moves to, from or between
41718 MMX/SSE registers. */
41719 return MAX (8, ix86_cost->mmxsse_to_integer);
41721 if (MAYBE_FLOAT_CLASS_P (class1))
41722 return ix86_cost->fp_move;
41723 if (MAYBE_SSE_CLASS_P (class1))
41724 return ix86_cost->sse_move;
41725 if (MAYBE_MMX_CLASS_P (class1))
41726 return ix86_cost->mmx_move;
41727 return 2;
41730 /* Return TRUE if hard register REGNO can hold a value of machine-mode
41731 MODE. */
41733 bool
41734 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
41736 /* Flags and only flags can only hold CCmode values. */
41737 if (CC_REGNO_P (regno))
41738 return GET_MODE_CLASS (mode) == MODE_CC;
41739 if (GET_MODE_CLASS (mode) == MODE_CC
41740 || GET_MODE_CLASS (mode) == MODE_RANDOM
41741 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
41742 return false;
41743 if (STACK_REGNO_P (regno))
41744 return VALID_FP_MODE_P (mode);
41745 if (MASK_REGNO_P (regno))
41746 return (VALID_MASK_REG_MODE (mode)
41747 || (TARGET_AVX512BW
41748 && VALID_MASK_AVX512BW_MODE (mode)));
41749 if (BND_REGNO_P (regno))
41750 return VALID_BND_REG_MODE (mode);
41751 if (SSE_REGNO_P (regno))
41753 /* We implement the move patterns for all vector modes into and
41754 out of SSE registers, even when no operation instructions
41755 are available. */
41757 /* For AVX-512 we allow, regardless of regno:
41758 - XI mode
41759 - any of 512-bit wide vector mode
41760 - any scalar mode. */
41761 if (TARGET_AVX512F
41762 && (mode == XImode
41763 || VALID_AVX512F_REG_MODE (mode)
41764 || VALID_AVX512F_SCALAR_MODE (mode)))
41765 return true;
41767 /* TODO check for QI/HI scalars. */
41768 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
41769 if (TARGET_AVX512VL
41770 && (mode == OImode
41771 || mode == TImode
41772 || VALID_AVX256_REG_MODE (mode)
41773 || VALID_AVX512VL_128_REG_MODE (mode)))
41774 return true;
41776 /* xmm16-xmm31 are only available for AVX-512. */
41777 if (EXT_REX_SSE_REGNO_P (regno))
41778 return false;
41780 /* OImode and AVX modes are available only when AVX is enabled. */
41781 return ((TARGET_AVX
41782 && VALID_AVX256_REG_OR_OI_MODE (mode))
41783 || VALID_SSE_REG_MODE (mode)
41784 || VALID_SSE2_REG_MODE (mode)
41785 || VALID_MMX_REG_MODE (mode)
41786 || VALID_MMX_REG_MODE_3DNOW (mode));
41788 if (MMX_REGNO_P (regno))
41790 /* We implement the move patterns for 3DNOW modes even in MMX mode,
41791 so if the register is available at all, then we can move data of
41792 the given mode into or out of it. */
41793 return (VALID_MMX_REG_MODE (mode)
41794 || VALID_MMX_REG_MODE_3DNOW (mode));
41797 if (mode == QImode)
41799 /* Take care for QImode values - they can be in non-QI regs,
41800 but then they do cause partial register stalls. */
41801 if (ANY_QI_REGNO_P (regno))
41802 return true;
41803 if (!TARGET_PARTIAL_REG_STALL)
41804 return true;
41805 /* LRA checks if the hard register is OK for the given mode.
41806 QImode values can live in non-QI regs, so we allow all
41807 registers here. */
41808 if (lra_in_progress)
41809 return true;
41810 return !can_create_pseudo_p ();
41812 /* We handle both integer and floats in the general purpose registers. */
41813 else if (VALID_INT_MODE_P (mode))
41814 return true;
41815 else if (VALID_FP_MODE_P (mode))
41816 return true;
41817 else if (VALID_DFP_MODE_P (mode))
41818 return true;
41819 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
41820 on to use that value in smaller contexts, this can easily force a
41821 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
41822 supporting DImode, allow it. */
41823 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
41824 return true;
41826 return false;
41829 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
41830 tieable integer mode. */
41832 static bool
41833 ix86_tieable_integer_mode_p (machine_mode mode)
41835 switch (mode)
41837 case HImode:
41838 case SImode:
41839 return true;
41841 case QImode:
41842 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
41844 case DImode:
41845 return TARGET_64BIT;
41847 default:
41848 return false;
41852 /* Return true if MODE1 is accessible in a register that can hold MODE2
41853 without copying. That is, all register classes that can hold MODE2
41854 can also hold MODE1. */
41856 bool
41857 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
41859 if (mode1 == mode2)
41860 return true;
41862 if (ix86_tieable_integer_mode_p (mode1)
41863 && ix86_tieable_integer_mode_p (mode2))
41864 return true;
41866 /* MODE2 being XFmode implies fp stack or general regs, which means we
41867 can tie any smaller floating point modes to it. Note that we do not
41868 tie this with TFmode. */
41869 if (mode2 == XFmode)
41870 return mode1 == SFmode || mode1 == DFmode;
41872 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
41873 that we can tie it with SFmode. */
41874 if (mode2 == DFmode)
41875 return mode1 == SFmode;
41877 /* If MODE2 is only appropriate for an SSE register, then tie with
41878 any other mode acceptable to SSE registers. */
41879 if (GET_MODE_SIZE (mode2) == 32
41880 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41881 return (GET_MODE_SIZE (mode1) == 32
41882 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41883 if (GET_MODE_SIZE (mode2) == 16
41884 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41885 return (GET_MODE_SIZE (mode1) == 16
41886 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41888 /* If MODE2 is appropriate for an MMX register, then tie
41889 with any other mode acceptable to MMX registers. */
41890 if (GET_MODE_SIZE (mode2) == 8
41891 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
41892 return (GET_MODE_SIZE (mode1) == 8
41893 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
41895 return false;
41898 /* Return the cost of moving between two registers of mode MODE. */
41900 static int
41901 ix86_set_reg_reg_cost (machine_mode mode)
41903 unsigned int units = UNITS_PER_WORD;
41905 switch (GET_MODE_CLASS (mode))
41907 default:
41908 break;
41910 case MODE_CC:
41911 units = GET_MODE_SIZE (CCmode);
41912 break;
41914 case MODE_FLOAT:
41915 if ((TARGET_SSE && mode == TFmode)
41916 || (TARGET_80387 && mode == XFmode)
41917 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
41918 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
41919 units = GET_MODE_SIZE (mode);
41920 break;
41922 case MODE_COMPLEX_FLOAT:
41923 if ((TARGET_SSE && mode == TCmode)
41924 || (TARGET_80387 && mode == XCmode)
41925 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
41926 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
41927 units = GET_MODE_SIZE (mode);
41928 break;
41930 case MODE_VECTOR_INT:
41931 case MODE_VECTOR_FLOAT:
41932 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41933 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41934 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41935 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41936 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
41937 units = GET_MODE_SIZE (mode);
41940 /* Return the cost of moving between two registers of mode MODE,
41941 assuming that the move will be in pieces of at most UNITS bytes. */
41942 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
41945 /* Compute a (partial) cost for rtx X. Return true if the complete
41946 cost has been computed, and false if subexpressions should be
41947 scanned. In either case, *TOTAL contains the cost result. */
41949 static bool
41950 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
41951 bool speed)
41953 rtx mask;
41954 enum rtx_code code = (enum rtx_code) code_i;
41955 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
41956 machine_mode mode = GET_MODE (x);
41957 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
41959 switch (code)
41961 case SET:
41962 if (register_operand (SET_DEST (x), VOIDmode)
41963 && reg_or_0_operand (SET_SRC (x), VOIDmode))
41965 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
41966 return true;
41968 return false;
41970 case CONST_INT:
41971 case CONST:
41972 case LABEL_REF:
41973 case SYMBOL_REF:
41974 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
41975 *total = 3;
41976 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
41977 *total = 2;
41978 else if (flag_pic && SYMBOLIC_CONST (x)
41979 && !(TARGET_64BIT
41980 && (GET_CODE (x) == LABEL_REF
41981 || (GET_CODE (x) == SYMBOL_REF
41982 && SYMBOL_REF_LOCAL_P (x)))))
41983 *total = 1;
41984 else
41985 *total = 0;
41986 return true;
41988 case CONST_WIDE_INT:
41989 *total = 0;
41990 return true;
41992 case CONST_DOUBLE:
41993 switch (standard_80387_constant_p (x))
41995 case 1: /* 0.0 */
41996 *total = 1;
41997 return true;
41998 default: /* Other constants */
41999 *total = 2;
42000 return true;
42001 case 0:
42002 case -1:
42003 break;
42005 if (SSE_FLOAT_MODE_P (mode))
42007 case CONST_VECTOR:
42008 switch (standard_sse_constant_p (x))
42010 case 0:
42011 break;
42012 case 1: /* 0: xor eliminates false dependency */
42013 *total = 0;
42014 return true;
42015 default: /* -1: cmp contains false dependency */
42016 *total = 1;
42017 return true;
42020 /* Fall back to (MEM (SYMBOL_REF)), since that's where
42021 it'll probably end up. Add a penalty for size. */
42022 *total = (COSTS_N_INSNS (1)
42023 + (flag_pic != 0 && !TARGET_64BIT)
42024 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
42025 return true;
42027 case ZERO_EXTEND:
42028 /* The zero extensions is often completely free on x86_64, so make
42029 it as cheap as possible. */
42030 if (TARGET_64BIT && mode == DImode
42031 && GET_MODE (XEXP (x, 0)) == SImode)
42032 *total = 1;
42033 else if (TARGET_ZERO_EXTEND_WITH_AND)
42034 *total = cost->add;
42035 else
42036 *total = cost->movzx;
42037 return false;
42039 case SIGN_EXTEND:
42040 *total = cost->movsx;
42041 return false;
42043 case ASHIFT:
42044 if (SCALAR_INT_MODE_P (mode)
42045 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
42046 && CONST_INT_P (XEXP (x, 1)))
42048 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42049 if (value == 1)
42051 *total = cost->add;
42052 return false;
42054 if ((value == 2 || value == 3)
42055 && cost->lea <= cost->shift_const)
42057 *total = cost->lea;
42058 return false;
42061 /* FALLTHRU */
42063 case ROTATE:
42064 case ASHIFTRT:
42065 case LSHIFTRT:
42066 case ROTATERT:
42067 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42069 /* ??? Should be SSE vector operation cost. */
42070 /* At least for published AMD latencies, this really is the same
42071 as the latency for a simple fpu operation like fabs. */
42072 /* V*QImode is emulated with 1-11 insns. */
42073 if (mode == V16QImode || mode == V32QImode)
42075 int count = 11;
42076 if (TARGET_XOP && mode == V16QImode)
42078 /* For XOP we use vpshab, which requires a broadcast of the
42079 value to the variable shift insn. For constants this
42080 means a V16Q const in mem; even when we can perform the
42081 shift with one insn set the cost to prefer paddb. */
42082 if (CONSTANT_P (XEXP (x, 1)))
42084 *total = (cost->fabs
42085 + rtx_cost (XEXP (x, 0), code, 0, speed)
42086 + (speed ? 2 : COSTS_N_BYTES (16)));
42087 return true;
42089 count = 3;
42091 else if (TARGET_SSSE3)
42092 count = 7;
42093 *total = cost->fabs * count;
42095 else
42096 *total = cost->fabs;
42098 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42100 if (CONST_INT_P (XEXP (x, 1)))
42102 if (INTVAL (XEXP (x, 1)) > 32)
42103 *total = cost->shift_const + COSTS_N_INSNS (2);
42104 else
42105 *total = cost->shift_const * 2;
42107 else
42109 if (GET_CODE (XEXP (x, 1)) == AND)
42110 *total = cost->shift_var * 2;
42111 else
42112 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
42115 else
42117 if (CONST_INT_P (XEXP (x, 1)))
42118 *total = cost->shift_const;
42119 else if (GET_CODE (XEXP (x, 1)) == SUBREG
42120 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
42122 /* Return the cost after shift-and truncation. */
42123 *total = cost->shift_var;
42124 return true;
42126 else
42127 *total = cost->shift_var;
42129 return false;
42131 case FMA:
42133 rtx sub;
42135 gcc_assert (FLOAT_MODE_P (mode));
42136 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
42138 /* ??? SSE scalar/vector cost should be used here. */
42139 /* ??? Bald assumption that fma has the same cost as fmul. */
42140 *total = cost->fmul;
42141 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
42143 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
42144 sub = XEXP (x, 0);
42145 if (GET_CODE (sub) == NEG)
42146 sub = XEXP (sub, 0);
42147 *total += rtx_cost (sub, FMA, 0, speed);
42149 sub = XEXP (x, 2);
42150 if (GET_CODE (sub) == NEG)
42151 sub = XEXP (sub, 0);
42152 *total += rtx_cost (sub, FMA, 2, speed);
42153 return true;
42156 case MULT:
42157 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42159 /* ??? SSE scalar cost should be used here. */
42160 *total = cost->fmul;
42161 return false;
42163 else if (X87_FLOAT_MODE_P (mode))
42165 *total = cost->fmul;
42166 return false;
42168 else if (FLOAT_MODE_P (mode))
42170 /* ??? SSE vector cost should be used here. */
42171 *total = cost->fmul;
42172 return false;
42174 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42176 /* V*QImode is emulated with 7-13 insns. */
42177 if (mode == V16QImode || mode == V32QImode)
42179 int extra = 11;
42180 if (TARGET_XOP && mode == V16QImode)
42181 extra = 5;
42182 else if (TARGET_SSSE3)
42183 extra = 6;
42184 *total = cost->fmul * 2 + cost->fabs * extra;
42186 /* V*DImode is emulated with 5-8 insns. */
42187 else if (mode == V2DImode || mode == V4DImode)
42189 if (TARGET_XOP && mode == V2DImode)
42190 *total = cost->fmul * 2 + cost->fabs * 3;
42191 else
42192 *total = cost->fmul * 3 + cost->fabs * 5;
42194 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
42195 insns, including two PMULUDQ. */
42196 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
42197 *total = cost->fmul * 2 + cost->fabs * 5;
42198 else
42199 *total = cost->fmul;
42200 return false;
42202 else
42204 rtx op0 = XEXP (x, 0);
42205 rtx op1 = XEXP (x, 1);
42206 int nbits;
42207 if (CONST_INT_P (XEXP (x, 1)))
42209 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42210 for (nbits = 0; value != 0; value &= value - 1)
42211 nbits++;
42213 else
42214 /* This is arbitrary. */
42215 nbits = 7;
42217 /* Compute costs correctly for widening multiplication. */
42218 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
42219 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
42220 == GET_MODE_SIZE (mode))
42222 int is_mulwiden = 0;
42223 machine_mode inner_mode = GET_MODE (op0);
42225 if (GET_CODE (op0) == GET_CODE (op1))
42226 is_mulwiden = 1, op1 = XEXP (op1, 0);
42227 else if (CONST_INT_P (op1))
42229 if (GET_CODE (op0) == SIGN_EXTEND)
42230 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
42231 == INTVAL (op1);
42232 else
42233 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
42236 if (is_mulwiden)
42237 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
42240 *total = (cost->mult_init[MODE_INDEX (mode)]
42241 + nbits * cost->mult_bit
42242 + rtx_cost (op0, outer_code, opno, speed)
42243 + rtx_cost (op1, outer_code, opno, speed));
42245 return true;
42248 case DIV:
42249 case UDIV:
42250 case MOD:
42251 case UMOD:
42252 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42253 /* ??? SSE cost should be used here. */
42254 *total = cost->fdiv;
42255 else if (X87_FLOAT_MODE_P (mode))
42256 *total = cost->fdiv;
42257 else if (FLOAT_MODE_P (mode))
42258 /* ??? SSE vector cost should be used here. */
42259 *total = cost->fdiv;
42260 else
42261 *total = cost->divide[MODE_INDEX (mode)];
42262 return false;
42264 case PLUS:
42265 if (GET_MODE_CLASS (mode) == MODE_INT
42266 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
42268 if (GET_CODE (XEXP (x, 0)) == PLUS
42269 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
42270 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
42271 && CONSTANT_P (XEXP (x, 1)))
42273 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
42274 if (val == 2 || val == 4 || val == 8)
42276 *total = cost->lea;
42277 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42278 outer_code, opno, speed);
42279 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
42280 outer_code, opno, speed);
42281 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42282 return true;
42285 else if (GET_CODE (XEXP (x, 0)) == MULT
42286 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
42288 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
42289 if (val == 2 || val == 4 || val == 8)
42291 *total = cost->lea;
42292 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42293 outer_code, opno, speed);
42294 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42295 return true;
42298 else if (GET_CODE (XEXP (x, 0)) == PLUS)
42300 *total = cost->lea;
42301 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42302 outer_code, opno, speed);
42303 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42304 outer_code, opno, speed);
42305 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42306 return true;
42309 /* FALLTHRU */
42311 case MINUS:
42312 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42314 /* ??? SSE cost should be used here. */
42315 *total = cost->fadd;
42316 return false;
42318 else if (X87_FLOAT_MODE_P (mode))
42320 *total = cost->fadd;
42321 return false;
42323 else if (FLOAT_MODE_P (mode))
42325 /* ??? SSE vector cost should be used here. */
42326 *total = cost->fadd;
42327 return false;
42329 /* FALLTHRU */
42331 case AND:
42332 case IOR:
42333 case XOR:
42334 if (GET_MODE_CLASS (mode) == MODE_INT
42335 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42337 *total = (cost->add * 2
42338 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
42339 << (GET_MODE (XEXP (x, 0)) != DImode))
42340 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
42341 << (GET_MODE (XEXP (x, 1)) != DImode)));
42342 return true;
42344 /* FALLTHRU */
42346 case NEG:
42347 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42349 /* ??? SSE cost should be used here. */
42350 *total = cost->fchs;
42351 return false;
42353 else if (X87_FLOAT_MODE_P (mode))
42355 *total = cost->fchs;
42356 return false;
42358 else if (FLOAT_MODE_P (mode))
42360 /* ??? SSE vector cost should be used here. */
42361 *total = cost->fchs;
42362 return false;
42364 /* FALLTHRU */
42366 case NOT:
42367 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42369 /* ??? Should be SSE vector operation cost. */
42370 /* At least for published AMD latencies, this really is the same
42371 as the latency for a simple fpu operation like fabs. */
42372 *total = cost->fabs;
42374 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42375 *total = cost->add * 2;
42376 else
42377 *total = cost->add;
42378 return false;
42380 case COMPARE:
42381 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42382 && XEXP (XEXP (x, 0), 1) == const1_rtx
42383 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42384 && XEXP (x, 1) == const0_rtx)
42386 /* This kind of construct is implemented using test[bwl].
42387 Treat it as if we had an AND. */
42388 *total = (cost->add
42389 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
42390 + rtx_cost (const1_rtx, outer_code, opno, speed));
42391 return true;
42393 return false;
42395 case FLOAT_EXTEND:
42396 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42397 *total = 0;
42398 return false;
42400 case ABS:
42401 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42402 /* ??? SSE cost should be used here. */
42403 *total = cost->fabs;
42404 else if (X87_FLOAT_MODE_P (mode))
42405 *total = cost->fabs;
42406 else if (FLOAT_MODE_P (mode))
42407 /* ??? SSE vector cost should be used here. */
42408 *total = cost->fabs;
42409 return false;
42411 case SQRT:
42412 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42413 /* ??? SSE cost should be used here. */
42414 *total = cost->fsqrt;
42415 else if (X87_FLOAT_MODE_P (mode))
42416 *total = cost->fsqrt;
42417 else if (FLOAT_MODE_P (mode))
42418 /* ??? SSE vector cost should be used here. */
42419 *total = cost->fsqrt;
42420 return false;
42422 case UNSPEC:
42423 if (XINT (x, 1) == UNSPEC_TP)
42424 *total = 0;
42425 return false;
42427 case VEC_SELECT:
42428 case VEC_CONCAT:
42429 case VEC_DUPLICATE:
42430 /* ??? Assume all of these vector manipulation patterns are
42431 recognizable. In which case they all pretty much have the
42432 same cost. */
42433 *total = cost->fabs;
42434 return true;
42435 case VEC_MERGE:
42436 mask = XEXP (x, 2);
42437 /* This is masked instruction, assume the same cost,
42438 as nonmasked variant. */
42439 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42440 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
42441 else
42442 *total = cost->fabs;
42443 return true;
42445 default:
42446 return false;
42450 #if TARGET_MACHO
42452 static int current_machopic_label_num;
42454 /* Given a symbol name and its associated stub, write out the
42455 definition of the stub. */
42457 void
42458 machopic_output_stub (FILE *file, const char *symb, const char *stub)
42460 unsigned int length;
42461 char *binder_name, *symbol_name, lazy_ptr_name[32];
42462 int label = ++current_machopic_label_num;
42464 /* For 64-bit we shouldn't get here. */
42465 gcc_assert (!TARGET_64BIT);
42467 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
42468 symb = targetm.strip_name_encoding (symb);
42470 length = strlen (stub);
42471 binder_name = XALLOCAVEC (char, length + 32);
42472 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42474 length = strlen (symb);
42475 symbol_name = XALLOCAVEC (char, length + 32);
42476 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42478 sprintf (lazy_ptr_name, "L%d$lz", label);
42480 if (MACHOPIC_ATT_STUB)
42481 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42482 else if (MACHOPIC_PURE)
42483 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42484 else
42485 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42487 fprintf (file, "%s:\n", stub);
42488 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42490 if (MACHOPIC_ATT_STUB)
42492 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42494 else if (MACHOPIC_PURE)
42496 /* PIC stub. */
42497 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42498 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42499 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
42500 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42501 label, lazy_ptr_name, label);
42502 fprintf (file, "\tjmp\t*%%ecx\n");
42504 else
42505 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42507 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42508 it needs no stub-binding-helper. */
42509 if (MACHOPIC_ATT_STUB)
42510 return;
42512 fprintf (file, "%s:\n", binder_name);
42514 if (MACHOPIC_PURE)
42516 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42517 fprintf (file, "\tpushl\t%%ecx\n");
42519 else
42520 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42522 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42524 /* N.B. Keep the correspondence of these
42525 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42526 old-pic/new-pic/non-pic stubs; altering this will break
42527 compatibility with existing dylibs. */
42528 if (MACHOPIC_PURE)
42530 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42531 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42533 else
42534 /* 16-byte -mdynamic-no-pic stub. */
42535 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42537 fprintf (file, "%s:\n", lazy_ptr_name);
42538 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42539 fprintf (file, ASM_LONG "%s\n", binder_name);
42541 #endif /* TARGET_MACHO */
42543 /* Order the registers for register allocator. */
42545 void
42546 x86_order_regs_for_local_alloc (void)
42548 int pos = 0;
42549 int i;
42551 /* First allocate the local general purpose registers. */
42552 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42553 if (GENERAL_REGNO_P (i) && call_used_regs[i])
42554 reg_alloc_order [pos++] = i;
42556 /* Global general purpose registers. */
42557 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42558 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42559 reg_alloc_order [pos++] = i;
42561 /* x87 registers come first in case we are doing FP math
42562 using them. */
42563 if (!TARGET_SSE_MATH)
42564 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42565 reg_alloc_order [pos++] = i;
42567 /* SSE registers. */
42568 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42569 reg_alloc_order [pos++] = i;
42570 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42571 reg_alloc_order [pos++] = i;
42573 /* Extended REX SSE registers. */
42574 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42575 reg_alloc_order [pos++] = i;
42577 /* Mask register. */
42578 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42579 reg_alloc_order [pos++] = i;
42581 /* MPX bound registers. */
42582 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42583 reg_alloc_order [pos++] = i;
42585 /* x87 registers. */
42586 if (TARGET_SSE_MATH)
42587 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42588 reg_alloc_order [pos++] = i;
42590 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42591 reg_alloc_order [pos++] = i;
42593 /* Initialize the rest of array as we do not allocate some registers
42594 at all. */
42595 while (pos < FIRST_PSEUDO_REGISTER)
42596 reg_alloc_order [pos++] = 0;
42599 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42600 in struct attribute_spec handler. */
42601 static tree
42602 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42603 tree args,
42604 int,
42605 bool *no_add_attrs)
42607 if (TREE_CODE (*node) != FUNCTION_TYPE
42608 && TREE_CODE (*node) != METHOD_TYPE
42609 && TREE_CODE (*node) != FIELD_DECL
42610 && TREE_CODE (*node) != TYPE_DECL)
42612 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42613 name);
42614 *no_add_attrs = true;
42615 return NULL_TREE;
42617 if (TARGET_64BIT)
42619 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42620 name);
42621 *no_add_attrs = true;
42622 return NULL_TREE;
42624 if (is_attribute_p ("callee_pop_aggregate_return", name))
42626 tree cst;
42628 cst = TREE_VALUE (args);
42629 if (TREE_CODE (cst) != INTEGER_CST)
42631 warning (OPT_Wattributes,
42632 "%qE attribute requires an integer constant argument",
42633 name);
42634 *no_add_attrs = true;
42636 else if (compare_tree_int (cst, 0) != 0
42637 && compare_tree_int (cst, 1) != 0)
42639 warning (OPT_Wattributes,
42640 "argument to %qE attribute is neither zero, nor one",
42641 name);
42642 *no_add_attrs = true;
42645 return NULL_TREE;
42648 return NULL_TREE;
42651 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
42652 struct attribute_spec.handler. */
42653 static tree
42654 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
42655 bool *no_add_attrs)
42657 if (TREE_CODE (*node) != FUNCTION_TYPE
42658 && TREE_CODE (*node) != METHOD_TYPE
42659 && TREE_CODE (*node) != FIELD_DECL
42660 && TREE_CODE (*node) != TYPE_DECL)
42662 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42663 name);
42664 *no_add_attrs = true;
42665 return NULL_TREE;
42668 /* Can combine regparm with all attributes but fastcall. */
42669 if (is_attribute_p ("ms_abi", name))
42671 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
42673 error ("ms_abi and sysv_abi attributes are not compatible");
42676 return NULL_TREE;
42678 else if (is_attribute_p ("sysv_abi", name))
42680 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
42682 error ("ms_abi and sysv_abi attributes are not compatible");
42685 return NULL_TREE;
42688 return NULL_TREE;
42691 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
42692 struct attribute_spec.handler. */
42693 static tree
42694 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
42695 bool *no_add_attrs)
42697 tree *type = NULL;
42698 if (DECL_P (*node))
42700 if (TREE_CODE (*node) == TYPE_DECL)
42701 type = &TREE_TYPE (*node);
42703 else
42704 type = node;
42706 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
42708 warning (OPT_Wattributes, "%qE attribute ignored",
42709 name);
42710 *no_add_attrs = true;
42713 else if ((is_attribute_p ("ms_struct", name)
42714 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
42715 || ((is_attribute_p ("gcc_struct", name)
42716 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
42718 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
42719 name);
42720 *no_add_attrs = true;
42723 return NULL_TREE;
42726 static tree
42727 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
42728 bool *no_add_attrs)
42730 if (TREE_CODE (*node) != FUNCTION_DECL)
42732 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42733 name);
42734 *no_add_attrs = true;
42736 return NULL_TREE;
42739 static bool
42740 ix86_ms_bitfield_layout_p (const_tree record_type)
42742 return ((TARGET_MS_BITFIELD_LAYOUT
42743 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
42744 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
42747 /* Returns an expression indicating where the this parameter is
42748 located on entry to the FUNCTION. */
42750 static rtx
42751 x86_this_parameter (tree function)
42753 tree type = TREE_TYPE (function);
42754 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
42755 int nregs;
42757 if (TARGET_64BIT)
42759 const int *parm_regs;
42761 if (ix86_function_type_abi (type) == MS_ABI)
42762 parm_regs = x86_64_ms_abi_int_parameter_registers;
42763 else
42764 parm_regs = x86_64_int_parameter_registers;
42765 return gen_rtx_REG (Pmode, parm_regs[aggr]);
42768 nregs = ix86_function_regparm (type, function);
42770 if (nregs > 0 && !stdarg_p (type))
42772 int regno;
42773 unsigned int ccvt = ix86_get_callcvt (type);
42775 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42776 regno = aggr ? DX_REG : CX_REG;
42777 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42779 regno = CX_REG;
42780 if (aggr)
42781 return gen_rtx_MEM (SImode,
42782 plus_constant (Pmode, stack_pointer_rtx, 4));
42784 else
42786 regno = AX_REG;
42787 if (aggr)
42789 regno = DX_REG;
42790 if (nregs == 1)
42791 return gen_rtx_MEM (SImode,
42792 plus_constant (Pmode,
42793 stack_pointer_rtx, 4));
42796 return gen_rtx_REG (SImode, regno);
42799 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
42800 aggr ? 8 : 4));
42803 /* Determine whether x86_output_mi_thunk can succeed. */
42805 static bool
42806 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
42807 const_tree function)
42809 /* 64-bit can handle anything. */
42810 if (TARGET_64BIT)
42811 return true;
42813 /* For 32-bit, everything's fine if we have one free register. */
42814 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
42815 return true;
42817 /* Need a free register for vcall_offset. */
42818 if (vcall_offset)
42819 return false;
42821 /* Need a free register for GOT references. */
42822 if (flag_pic && !targetm.binds_local_p (function))
42823 return false;
42825 /* Otherwise ok. */
42826 return true;
42829 /* Output the assembler code for a thunk function. THUNK_DECL is the
42830 declaration for the thunk function itself, FUNCTION is the decl for
42831 the target function. DELTA is an immediate constant offset to be
42832 added to THIS. If VCALL_OFFSET is nonzero, the word at
42833 *(*this + vcall_offset) should be added to THIS. */
42835 static void
42836 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
42837 HOST_WIDE_INT vcall_offset, tree function)
42839 rtx this_param = x86_this_parameter (function);
42840 rtx this_reg, tmp, fnaddr;
42841 unsigned int tmp_regno;
42842 rtx_insn *insn;
42844 if (TARGET_64BIT)
42845 tmp_regno = R10_REG;
42846 else
42848 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
42849 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42850 tmp_regno = AX_REG;
42851 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42852 tmp_regno = DX_REG;
42853 else
42854 tmp_regno = CX_REG;
42857 emit_note (NOTE_INSN_PROLOGUE_END);
42859 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
42860 pull it in now and let DELTA benefit. */
42861 if (REG_P (this_param))
42862 this_reg = this_param;
42863 else if (vcall_offset)
42865 /* Put the this parameter into %eax. */
42866 this_reg = gen_rtx_REG (Pmode, AX_REG);
42867 emit_move_insn (this_reg, this_param);
42869 else
42870 this_reg = NULL_RTX;
42872 /* Adjust the this parameter by a fixed constant. */
42873 if (delta)
42875 rtx delta_rtx = GEN_INT (delta);
42876 rtx delta_dst = this_reg ? this_reg : this_param;
42878 if (TARGET_64BIT)
42880 if (!x86_64_general_operand (delta_rtx, Pmode))
42882 tmp = gen_rtx_REG (Pmode, tmp_regno);
42883 emit_move_insn (tmp, delta_rtx);
42884 delta_rtx = tmp;
42888 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
42891 /* Adjust the this parameter by a value stored in the vtable. */
42892 if (vcall_offset)
42894 rtx vcall_addr, vcall_mem, this_mem;
42896 tmp = gen_rtx_REG (Pmode, tmp_regno);
42898 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
42899 if (Pmode != ptr_mode)
42900 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
42901 emit_move_insn (tmp, this_mem);
42903 /* Adjust the this parameter. */
42904 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
42905 if (TARGET_64BIT
42906 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
42908 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
42909 emit_move_insn (tmp2, GEN_INT (vcall_offset));
42910 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
42913 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
42914 if (Pmode != ptr_mode)
42915 emit_insn (gen_addsi_1_zext (this_reg,
42916 gen_rtx_REG (ptr_mode,
42917 REGNO (this_reg)),
42918 vcall_mem));
42919 else
42920 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
42923 /* If necessary, drop THIS back to its stack slot. */
42924 if (this_reg && this_reg != this_param)
42925 emit_move_insn (this_param, this_reg);
42927 fnaddr = XEXP (DECL_RTL (function), 0);
42928 if (TARGET_64BIT)
42930 if (!flag_pic || targetm.binds_local_p (function)
42931 || TARGET_PECOFF)
42933 else
42935 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
42936 tmp = gen_rtx_CONST (Pmode, tmp);
42937 fnaddr = gen_const_mem (Pmode, tmp);
42940 else
42942 if (!flag_pic || targetm.binds_local_p (function))
42944 #if TARGET_MACHO
42945 else if (TARGET_MACHO)
42947 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
42948 fnaddr = XEXP (fnaddr, 0);
42950 #endif /* TARGET_MACHO */
42951 else
42953 tmp = gen_rtx_REG (Pmode, CX_REG);
42954 output_set_got (tmp, NULL_RTX);
42956 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
42957 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
42958 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
42959 fnaddr = gen_const_mem (Pmode, fnaddr);
42963 /* Our sibling call patterns do not allow memories, because we have no
42964 predicate that can distinguish between frame and non-frame memory.
42965 For our purposes here, we can get away with (ab)using a jump pattern,
42966 because we're going to do no optimization. */
42967 if (MEM_P (fnaddr))
42969 if (sibcall_insn_operand (fnaddr, word_mode))
42971 fnaddr = XEXP (DECL_RTL (function), 0);
42972 tmp = gen_rtx_MEM (QImode, fnaddr);
42973 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
42974 tmp = emit_call_insn (tmp);
42975 SIBLING_CALL_P (tmp) = 1;
42977 else
42978 emit_jump_insn (gen_indirect_jump (fnaddr));
42980 else
42982 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
42984 // CM_LARGE_PIC always uses pseudo PIC register which is
42985 // uninitialized. Since FUNCTION is local and calling it
42986 // doesn't go through PLT, we use scratch register %r11 as
42987 // PIC register and initialize it here.
42988 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
42989 ix86_init_large_pic_reg (tmp_regno);
42990 fnaddr = legitimize_pic_address (fnaddr,
42991 gen_rtx_REG (Pmode, tmp_regno));
42994 if (!sibcall_insn_operand (fnaddr, word_mode))
42996 tmp = gen_rtx_REG (word_mode, tmp_regno);
42997 if (GET_MODE (fnaddr) != word_mode)
42998 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
42999 emit_move_insn (tmp, fnaddr);
43000 fnaddr = tmp;
43003 tmp = gen_rtx_MEM (QImode, fnaddr);
43004 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43005 tmp = emit_call_insn (tmp);
43006 SIBLING_CALL_P (tmp) = 1;
43008 emit_barrier ();
43010 /* Emit just enough of rest_of_compilation to get the insns emitted.
43011 Note that use_thunk calls assemble_start_function et al. */
43012 insn = get_insns ();
43013 shorten_branches (insn);
43014 final_start_function (insn, file, 1);
43015 final (insn, file, 1);
43016 final_end_function ();
43019 static void
43020 x86_file_start (void)
43022 default_file_start ();
43023 if (TARGET_16BIT)
43024 fputs ("\t.code16gcc\n", asm_out_file);
43025 #if TARGET_MACHO
43026 darwin_file_start ();
43027 #endif
43028 if (X86_FILE_START_VERSION_DIRECTIVE)
43029 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
43030 if (X86_FILE_START_FLTUSED)
43031 fputs ("\t.global\t__fltused\n", asm_out_file);
43032 if (ix86_asm_dialect == ASM_INTEL)
43033 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
43037 x86_field_alignment (tree field, int computed)
43039 machine_mode mode;
43040 tree type = TREE_TYPE (field);
43042 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
43043 return computed;
43044 mode = TYPE_MODE (strip_array_types (type));
43045 if (mode == DFmode || mode == DCmode
43046 || GET_MODE_CLASS (mode) == MODE_INT
43047 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
43048 return MIN (32, computed);
43049 return computed;
43052 /* Print call to TARGET to FILE. */
43054 static void
43055 x86_print_call_or_nop (FILE *file, const char *target)
43057 if (flag_nop_mcount)
43058 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
43059 else
43060 fprintf (file, "1:\tcall\t%s\n", target);
43063 /* Output assembler code to FILE to increment profiler label # LABELNO
43064 for profiling a function entry. */
43065 void
43066 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
43068 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
43069 : MCOUNT_NAME);
43070 if (TARGET_64BIT)
43072 #ifndef NO_PROFILE_COUNTERS
43073 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
43074 #endif
43076 if (!TARGET_PECOFF && flag_pic)
43077 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
43078 else
43079 x86_print_call_or_nop (file, mcount_name);
43081 else if (flag_pic)
43083 #ifndef NO_PROFILE_COUNTERS
43084 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
43085 LPREFIX, labelno);
43086 #endif
43087 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
43089 else
43091 #ifndef NO_PROFILE_COUNTERS
43092 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
43093 LPREFIX, labelno);
43094 #endif
43095 x86_print_call_or_nop (file, mcount_name);
43098 if (flag_record_mcount)
43100 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
43101 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
43102 fprintf (file, "\t.previous\n");
43106 /* We don't have exact information about the insn sizes, but we may assume
43107 quite safely that we are informed about all 1 byte insns and memory
43108 address sizes. This is enough to eliminate unnecessary padding in
43109 99% of cases. */
43111 static int
43112 min_insn_size (rtx_insn *insn)
43114 int l = 0, len;
43116 if (!INSN_P (insn) || !active_insn_p (insn))
43117 return 0;
43119 /* Discard alignments we've emit and jump instructions. */
43120 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
43121 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
43122 return 0;
43124 /* Important case - calls are always 5 bytes.
43125 It is common to have many calls in the row. */
43126 if (CALL_P (insn)
43127 && symbolic_reference_mentioned_p (PATTERN (insn))
43128 && !SIBLING_CALL_P (insn))
43129 return 5;
43130 len = get_attr_length (insn);
43131 if (len <= 1)
43132 return 1;
43134 /* For normal instructions we rely on get_attr_length being exact,
43135 with a few exceptions. */
43136 if (!JUMP_P (insn))
43138 enum attr_type type = get_attr_type (insn);
43140 switch (type)
43142 case TYPE_MULTI:
43143 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
43144 || asm_noperands (PATTERN (insn)) >= 0)
43145 return 0;
43146 break;
43147 case TYPE_OTHER:
43148 case TYPE_FCMP:
43149 break;
43150 default:
43151 /* Otherwise trust get_attr_length. */
43152 return len;
43155 l = get_attr_length_address (insn);
43156 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
43157 l = 4;
43159 if (l)
43160 return 1+l;
43161 else
43162 return 2;
43165 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43167 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
43168 window. */
43170 static void
43171 ix86_avoid_jump_mispredicts (void)
43173 rtx_insn *insn, *start = get_insns ();
43174 int nbytes = 0, njumps = 0;
43175 bool isjump = false;
43177 /* Look for all minimal intervals of instructions containing 4 jumps.
43178 The intervals are bounded by START and INSN. NBYTES is the total
43179 size of instructions in the interval including INSN and not including
43180 START. When the NBYTES is smaller than 16 bytes, it is possible
43181 that the end of START and INSN ends up in the same 16byte page.
43183 The smallest offset in the page INSN can start is the case where START
43184 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
43185 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
43187 Don't consider asm goto as jump, while it can contain a jump, it doesn't
43188 have to, control transfer to label(s) can be performed through other
43189 means, and also we estimate minimum length of all asm stmts as 0. */
43190 for (insn = start; insn; insn = NEXT_INSN (insn))
43192 int min_size;
43194 if (LABEL_P (insn))
43196 int align = label_to_alignment (insn);
43197 int max_skip = label_to_max_skip (insn);
43199 if (max_skip > 15)
43200 max_skip = 15;
43201 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
43202 already in the current 16 byte page, because otherwise
43203 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
43204 bytes to reach 16 byte boundary. */
43205 if (align <= 0
43206 || (align <= 3 && max_skip != (1 << align) - 1))
43207 max_skip = 0;
43208 if (dump_file)
43209 fprintf (dump_file, "Label %i with max_skip %i\n",
43210 INSN_UID (insn), max_skip);
43211 if (max_skip)
43213 while (nbytes + max_skip >= 16)
43215 start = NEXT_INSN (start);
43216 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43217 || CALL_P (start))
43218 njumps--, isjump = true;
43219 else
43220 isjump = false;
43221 nbytes -= min_insn_size (start);
43224 continue;
43227 min_size = min_insn_size (insn);
43228 nbytes += min_size;
43229 if (dump_file)
43230 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
43231 INSN_UID (insn), min_size);
43232 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
43233 || CALL_P (insn))
43234 njumps++;
43235 else
43236 continue;
43238 while (njumps > 3)
43240 start = NEXT_INSN (start);
43241 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43242 || CALL_P (start))
43243 njumps--, isjump = true;
43244 else
43245 isjump = false;
43246 nbytes -= min_insn_size (start);
43248 gcc_assert (njumps >= 0);
43249 if (dump_file)
43250 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
43251 INSN_UID (start), INSN_UID (insn), nbytes);
43253 if (njumps == 3 && isjump && nbytes < 16)
43255 int padsize = 15 - nbytes + min_insn_size (insn);
43257 if (dump_file)
43258 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
43259 INSN_UID (insn), padsize);
43260 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
43264 #endif
43266 /* AMD Athlon works faster
43267 when RET is not destination of conditional jump or directly preceded
43268 by other jump instruction. We avoid the penalty by inserting NOP just
43269 before the RET instructions in such cases. */
43270 static void
43271 ix86_pad_returns (void)
43273 edge e;
43274 edge_iterator ei;
43276 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43278 basic_block bb = e->src;
43279 rtx_insn *ret = BB_END (bb);
43280 rtx_insn *prev;
43281 bool replace = false;
43283 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
43284 || optimize_bb_for_size_p (bb))
43285 continue;
43286 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
43287 if (active_insn_p (prev) || LABEL_P (prev))
43288 break;
43289 if (prev && LABEL_P (prev))
43291 edge e;
43292 edge_iterator ei;
43294 FOR_EACH_EDGE (e, ei, bb->preds)
43295 if (EDGE_FREQUENCY (e) && e->src->index >= 0
43296 && !(e->flags & EDGE_FALLTHRU))
43298 replace = true;
43299 break;
43302 if (!replace)
43304 prev = prev_active_insn (ret);
43305 if (prev
43306 && ((JUMP_P (prev) && any_condjump_p (prev))
43307 || CALL_P (prev)))
43308 replace = true;
43309 /* Empty functions get branch mispredict even when
43310 the jump destination is not visible to us. */
43311 if (!prev && !optimize_function_for_size_p (cfun))
43312 replace = true;
43314 if (replace)
43316 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43317 delete_insn (ret);
43322 /* Count the minimum number of instructions in BB. Return 4 if the
43323 number of instructions >= 4. */
43325 static int
43326 ix86_count_insn_bb (basic_block bb)
43328 rtx_insn *insn;
43329 int insn_count = 0;
43331 /* Count number of instructions in this block. Return 4 if the number
43332 of instructions >= 4. */
43333 FOR_BB_INSNS (bb, insn)
43335 /* Only happen in exit blocks. */
43336 if (JUMP_P (insn)
43337 && ANY_RETURN_P (PATTERN (insn)))
43338 break;
43340 if (NONDEBUG_INSN_P (insn)
43341 && GET_CODE (PATTERN (insn)) != USE
43342 && GET_CODE (PATTERN (insn)) != CLOBBER)
43344 insn_count++;
43345 if (insn_count >= 4)
43346 return insn_count;
43350 return insn_count;
43354 /* Count the minimum number of instructions in code path in BB.
43355 Return 4 if the number of instructions >= 4. */
43357 static int
43358 ix86_count_insn (basic_block bb)
43360 edge e;
43361 edge_iterator ei;
43362 int min_prev_count;
43364 /* Only bother counting instructions along paths with no
43365 more than 2 basic blocks between entry and exit. Given
43366 that BB has an edge to exit, determine if a predecessor
43367 of BB has an edge from entry. If so, compute the number
43368 of instructions in the predecessor block. If there
43369 happen to be multiple such blocks, compute the minimum. */
43370 min_prev_count = 4;
43371 FOR_EACH_EDGE (e, ei, bb->preds)
43373 edge prev_e;
43374 edge_iterator prev_ei;
43376 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43378 min_prev_count = 0;
43379 break;
43381 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43383 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43385 int count = ix86_count_insn_bb (e->src);
43386 if (count < min_prev_count)
43387 min_prev_count = count;
43388 break;
43393 if (min_prev_count < 4)
43394 min_prev_count += ix86_count_insn_bb (bb);
43396 return min_prev_count;
43399 /* Pad short function to 4 instructions. */
43401 static void
43402 ix86_pad_short_function (void)
43404 edge e;
43405 edge_iterator ei;
43407 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43409 rtx_insn *ret = BB_END (e->src);
43410 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43412 int insn_count = ix86_count_insn (e->src);
43414 /* Pad short function. */
43415 if (insn_count < 4)
43417 rtx_insn *insn = ret;
43419 /* Find epilogue. */
43420 while (insn
43421 && (!NOTE_P (insn)
43422 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43423 insn = PREV_INSN (insn);
43425 if (!insn)
43426 insn = ret;
43428 /* Two NOPs count as one instruction. */
43429 insn_count = 2 * (4 - insn_count);
43430 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43436 /* Fix up a Windows system unwinder issue. If an EH region falls through into
43437 the epilogue, the Windows system unwinder will apply epilogue logic and
43438 produce incorrect offsets. This can be avoided by adding a nop between
43439 the last insn that can throw and the first insn of the epilogue. */
43441 static void
43442 ix86_seh_fixup_eh_fallthru (void)
43444 edge e;
43445 edge_iterator ei;
43447 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43449 rtx_insn *insn, *next;
43451 /* Find the beginning of the epilogue. */
43452 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43453 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43454 break;
43455 if (insn == NULL)
43456 continue;
43458 /* We only care about preceding insns that can throw. */
43459 insn = prev_active_insn (insn);
43460 if (insn == NULL || !can_throw_internal (insn))
43461 continue;
43463 /* Do not separate calls from their debug information. */
43464 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43465 if (NOTE_P (next)
43466 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43467 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43468 insn = next;
43469 else
43470 break;
43472 emit_insn_after (gen_nops (const1_rtx), insn);
43476 /* Implement machine specific optimizations. We implement padding of returns
43477 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
43478 static void
43479 ix86_reorg (void)
43481 /* We are freeing block_for_insn in the toplev to keep compatibility
43482 with old MDEP_REORGS that are not CFG based. Recompute it now. */
43483 compute_bb_for_insn ();
43485 if (TARGET_SEH && current_function_has_exception_handlers ())
43486 ix86_seh_fixup_eh_fallthru ();
43488 if (optimize && optimize_function_for_speed_p (cfun))
43490 if (TARGET_PAD_SHORT_FUNCTION)
43491 ix86_pad_short_function ();
43492 else if (TARGET_PAD_RETURNS)
43493 ix86_pad_returns ();
43494 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43495 if (TARGET_FOUR_JUMP_LIMIT)
43496 ix86_avoid_jump_mispredicts ();
43497 #endif
43501 /* Return nonzero when QImode register that must be represented via REX prefix
43502 is used. */
43503 bool
43504 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43506 int i;
43507 extract_insn_cached (insn);
43508 for (i = 0; i < recog_data.n_operands; i++)
43509 if (GENERAL_REG_P (recog_data.operand[i])
43510 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
43511 return true;
43512 return false;
43515 /* Return true when INSN mentions register that must be encoded using REX
43516 prefix. */
43517 bool
43518 x86_extended_reg_mentioned_p (rtx insn)
43520 subrtx_iterator::array_type array;
43521 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43523 const_rtx x = *iter;
43524 if (REG_P (x)
43525 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43526 return true;
43528 return false;
43531 /* If profitable, negate (without causing overflow) integer constant
43532 of mode MODE at location LOC. Return true in this case. */
43533 bool
43534 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43536 HOST_WIDE_INT val;
43538 if (!CONST_INT_P (*loc))
43539 return false;
43541 switch (mode)
43543 case DImode:
43544 /* DImode x86_64 constants must fit in 32 bits. */
43545 gcc_assert (x86_64_immediate_operand (*loc, mode));
43547 mode = SImode;
43548 break;
43550 case SImode:
43551 case HImode:
43552 case QImode:
43553 break;
43555 default:
43556 gcc_unreachable ();
43559 /* Avoid overflows. */
43560 if (mode_signbit_p (mode, *loc))
43561 return false;
43563 val = INTVAL (*loc);
43565 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43566 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
43567 if ((val < 0 && val != -128)
43568 || val == 128)
43570 *loc = GEN_INT (-val);
43571 return true;
43574 return false;
43577 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
43578 optabs would emit if we didn't have TFmode patterns. */
43580 void
43581 x86_emit_floatuns (rtx operands[2])
43583 rtx_code_label *neglab, *donelab;
43584 rtx i0, i1, f0, in, out;
43585 machine_mode mode, inmode;
43587 inmode = GET_MODE (operands[1]);
43588 gcc_assert (inmode == SImode || inmode == DImode);
43590 out = operands[0];
43591 in = force_reg (inmode, operands[1]);
43592 mode = GET_MODE (out);
43593 neglab = gen_label_rtx ();
43594 donelab = gen_label_rtx ();
43595 f0 = gen_reg_rtx (mode);
43597 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43599 expand_float (out, in, 0);
43601 emit_jump_insn (gen_jump (donelab));
43602 emit_barrier ();
43604 emit_label (neglab);
43606 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43607 1, OPTAB_DIRECT);
43608 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43609 1, OPTAB_DIRECT);
43610 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43612 expand_float (f0, i0, 0);
43614 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
43616 emit_label (donelab);
43619 static bool canonicalize_perm (struct expand_vec_perm_d *d);
43620 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43621 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43622 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43624 /* Get a vector mode of the same size as the original but with elements
43625 twice as wide. This is only guaranteed to apply to integral vectors. */
43627 static inline machine_mode
43628 get_mode_wider_vector (machine_mode o)
43630 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
43631 machine_mode n = GET_MODE_WIDER_MODE (o);
43632 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43633 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43634 return n;
43637 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
43638 fill target with val via vec_duplicate. */
43640 static bool
43641 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
43643 bool ok;
43644 rtx_insn *insn;
43645 rtx dup;
43647 /* First attempt to recognize VAL as-is. */
43648 dup = gen_rtx_VEC_DUPLICATE (mode, val);
43649 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
43650 if (recog_memoized (insn) < 0)
43652 rtx_insn *seq;
43653 /* If that fails, force VAL into a register. */
43655 start_sequence ();
43656 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
43657 seq = get_insns ();
43658 end_sequence ();
43659 if (seq)
43660 emit_insn_before (seq, insn);
43662 ok = recog_memoized (insn) >= 0;
43663 gcc_assert (ok);
43665 return true;
43668 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43669 with all elements equal to VAR. Return true if successful. */
43671 static bool
43672 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
43673 rtx target, rtx val)
43675 bool ok;
43677 switch (mode)
43679 case V2SImode:
43680 case V2SFmode:
43681 if (!mmx_ok)
43682 return false;
43683 /* FALLTHRU */
43685 case V4DFmode:
43686 case V4DImode:
43687 case V8SFmode:
43688 case V8SImode:
43689 case V2DFmode:
43690 case V2DImode:
43691 case V4SFmode:
43692 case V4SImode:
43693 case V16SImode:
43694 case V8DImode:
43695 case V16SFmode:
43696 case V8DFmode:
43697 return ix86_vector_duplicate_value (mode, target, val);
43699 case V4HImode:
43700 if (!mmx_ok)
43701 return false;
43702 if (TARGET_SSE || TARGET_3DNOW_A)
43704 rtx x;
43706 val = gen_lowpart (SImode, val);
43707 x = gen_rtx_TRUNCATE (HImode, val);
43708 x = gen_rtx_VEC_DUPLICATE (mode, x);
43709 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43710 return true;
43712 goto widen;
43714 case V8QImode:
43715 if (!mmx_ok)
43716 return false;
43717 goto widen;
43719 case V8HImode:
43720 if (TARGET_AVX2)
43721 return ix86_vector_duplicate_value (mode, target, val);
43723 if (TARGET_SSE2)
43725 struct expand_vec_perm_d dperm;
43726 rtx tmp1, tmp2;
43728 permute:
43729 memset (&dperm, 0, sizeof (dperm));
43730 dperm.target = target;
43731 dperm.vmode = mode;
43732 dperm.nelt = GET_MODE_NUNITS (mode);
43733 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
43734 dperm.one_operand_p = true;
43736 /* Extend to SImode using a paradoxical SUBREG. */
43737 tmp1 = gen_reg_rtx (SImode);
43738 emit_move_insn (tmp1, gen_lowpart (SImode, val));
43740 /* Insert the SImode value as low element of a V4SImode vector. */
43741 tmp2 = gen_reg_rtx (V4SImode);
43742 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
43743 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
43745 ok = (expand_vec_perm_1 (&dperm)
43746 || expand_vec_perm_broadcast_1 (&dperm));
43747 gcc_assert (ok);
43748 return ok;
43750 goto widen;
43752 case V16QImode:
43753 if (TARGET_AVX2)
43754 return ix86_vector_duplicate_value (mode, target, val);
43756 if (TARGET_SSE2)
43757 goto permute;
43758 goto widen;
43760 widen:
43761 /* Replicate the value once into the next wider mode and recurse. */
43763 machine_mode smode, wsmode, wvmode;
43764 rtx x;
43766 smode = GET_MODE_INNER (mode);
43767 wvmode = get_mode_wider_vector (mode);
43768 wsmode = GET_MODE_INNER (wvmode);
43770 val = convert_modes (wsmode, smode, val, true);
43771 x = expand_simple_binop (wsmode, ASHIFT, val,
43772 GEN_INT (GET_MODE_BITSIZE (smode)),
43773 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43774 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
43776 x = gen_reg_rtx (wvmode);
43777 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
43778 gcc_assert (ok);
43779 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
43780 return ok;
43783 case V16HImode:
43784 case V32QImode:
43785 if (TARGET_AVX2)
43786 return ix86_vector_duplicate_value (mode, target, val);
43787 else
43789 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
43790 rtx x = gen_reg_rtx (hvmode);
43792 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43793 gcc_assert (ok);
43795 x = gen_rtx_VEC_CONCAT (mode, x, x);
43796 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43798 return true;
43800 case V64QImode:
43801 case V32HImode:
43802 if (TARGET_AVX512BW)
43803 return ix86_vector_duplicate_value (mode, target, val);
43804 else
43806 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
43807 rtx x = gen_reg_rtx (hvmode);
43809 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43810 gcc_assert (ok);
43812 x = gen_rtx_VEC_CONCAT (mode, x, x);
43813 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43815 return true;
43817 default:
43818 return false;
43822 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43823 whose ONE_VAR element is VAR, and other elements are zero. Return true
43824 if successful. */
43826 static bool
43827 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
43828 rtx target, rtx var, int one_var)
43830 machine_mode vsimode;
43831 rtx new_target;
43832 rtx x, tmp;
43833 bool use_vector_set = false;
43835 switch (mode)
43837 case V2DImode:
43838 /* For SSE4.1, we normally use vector set. But if the second
43839 element is zero and inter-unit moves are OK, we use movq
43840 instead. */
43841 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
43842 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
43843 && one_var == 0));
43844 break;
43845 case V16QImode:
43846 case V4SImode:
43847 case V4SFmode:
43848 use_vector_set = TARGET_SSE4_1;
43849 break;
43850 case V8HImode:
43851 use_vector_set = TARGET_SSE2;
43852 break;
43853 case V4HImode:
43854 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
43855 break;
43856 case V32QImode:
43857 case V16HImode:
43858 case V8SImode:
43859 case V8SFmode:
43860 case V4DFmode:
43861 use_vector_set = TARGET_AVX;
43862 break;
43863 case V4DImode:
43864 /* Use ix86_expand_vector_set in 64bit mode only. */
43865 use_vector_set = TARGET_AVX && TARGET_64BIT;
43866 break;
43867 default:
43868 break;
43871 if (use_vector_set)
43873 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
43874 var = force_reg (GET_MODE_INNER (mode), var);
43875 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43876 return true;
43879 switch (mode)
43881 case V2SFmode:
43882 case V2SImode:
43883 if (!mmx_ok)
43884 return false;
43885 /* FALLTHRU */
43887 case V2DFmode:
43888 case V2DImode:
43889 if (one_var != 0)
43890 return false;
43891 var = force_reg (GET_MODE_INNER (mode), var);
43892 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
43893 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43894 return true;
43896 case V4SFmode:
43897 case V4SImode:
43898 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
43899 new_target = gen_reg_rtx (mode);
43900 else
43901 new_target = target;
43902 var = force_reg (GET_MODE_INNER (mode), var);
43903 x = gen_rtx_VEC_DUPLICATE (mode, var);
43904 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
43905 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
43906 if (one_var != 0)
43908 /* We need to shuffle the value to the correct position, so
43909 create a new pseudo to store the intermediate result. */
43911 /* With SSE2, we can use the integer shuffle insns. */
43912 if (mode != V4SFmode && TARGET_SSE2)
43914 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
43915 const1_rtx,
43916 GEN_INT (one_var == 1 ? 0 : 1),
43917 GEN_INT (one_var == 2 ? 0 : 1),
43918 GEN_INT (one_var == 3 ? 0 : 1)));
43919 if (target != new_target)
43920 emit_move_insn (target, new_target);
43921 return true;
43924 /* Otherwise convert the intermediate result to V4SFmode and
43925 use the SSE1 shuffle instructions. */
43926 if (mode != V4SFmode)
43928 tmp = gen_reg_rtx (V4SFmode);
43929 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
43931 else
43932 tmp = new_target;
43934 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
43935 const1_rtx,
43936 GEN_INT (one_var == 1 ? 0 : 1),
43937 GEN_INT (one_var == 2 ? 0+4 : 1+4),
43938 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
43940 if (mode != V4SFmode)
43941 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
43942 else if (tmp != target)
43943 emit_move_insn (target, tmp);
43945 else if (target != new_target)
43946 emit_move_insn (target, new_target);
43947 return true;
43949 case V8HImode:
43950 case V16QImode:
43951 vsimode = V4SImode;
43952 goto widen;
43953 case V4HImode:
43954 case V8QImode:
43955 if (!mmx_ok)
43956 return false;
43957 vsimode = V2SImode;
43958 goto widen;
43959 widen:
43960 if (one_var != 0)
43961 return false;
43963 /* Zero extend the variable element to SImode and recurse. */
43964 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
43966 x = gen_reg_rtx (vsimode);
43967 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
43968 var, one_var))
43969 gcc_unreachable ();
43971 emit_move_insn (target, gen_lowpart (mode, x));
43972 return true;
43974 default:
43975 return false;
43979 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43980 consisting of the values in VALS. It is known that all elements
43981 except ONE_VAR are constants. Return true if successful. */
43983 static bool
43984 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
43985 rtx target, rtx vals, int one_var)
43987 rtx var = XVECEXP (vals, 0, one_var);
43988 machine_mode wmode;
43989 rtx const_vec, x;
43991 const_vec = copy_rtx (vals);
43992 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
43993 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
43995 switch (mode)
43997 case V2DFmode:
43998 case V2DImode:
43999 case V2SFmode:
44000 case V2SImode:
44001 /* For the two element vectors, it's just as easy to use
44002 the general case. */
44003 return false;
44005 case V4DImode:
44006 /* Use ix86_expand_vector_set in 64bit mode only. */
44007 if (!TARGET_64BIT)
44008 return false;
44009 case V4DFmode:
44010 case V8SFmode:
44011 case V8SImode:
44012 case V16HImode:
44013 case V32QImode:
44014 case V4SFmode:
44015 case V4SImode:
44016 case V8HImode:
44017 case V4HImode:
44018 break;
44020 case V16QImode:
44021 if (TARGET_SSE4_1)
44022 break;
44023 wmode = V8HImode;
44024 goto widen;
44025 case V8QImode:
44026 wmode = V4HImode;
44027 goto widen;
44028 widen:
44029 /* There's no way to set one QImode entry easily. Combine
44030 the variable value with its adjacent constant value, and
44031 promote to an HImode set. */
44032 x = XVECEXP (vals, 0, one_var ^ 1);
44033 if (one_var & 1)
44035 var = convert_modes (HImode, QImode, var, true);
44036 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
44037 NULL_RTX, 1, OPTAB_LIB_WIDEN);
44038 x = GEN_INT (INTVAL (x) & 0xff);
44040 else
44042 var = convert_modes (HImode, QImode, var, true);
44043 x = gen_int_mode (INTVAL (x) << 8, HImode);
44045 if (x != const0_rtx)
44046 var = expand_simple_binop (HImode, IOR, var, x, var,
44047 1, OPTAB_LIB_WIDEN);
44049 x = gen_reg_rtx (wmode);
44050 emit_move_insn (x, gen_lowpart (wmode, const_vec));
44051 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
44053 emit_move_insn (target, gen_lowpart (mode, x));
44054 return true;
44056 default:
44057 return false;
44060 emit_move_insn (target, const_vec);
44061 ix86_expand_vector_set (mmx_ok, target, var, one_var);
44062 return true;
44065 /* A subroutine of ix86_expand_vector_init_general. Use vector
44066 concatenate to handle the most general case: all values variable,
44067 and none identical. */
44069 static void
44070 ix86_expand_vector_init_concat (machine_mode mode,
44071 rtx target, rtx *ops, int n)
44073 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
44074 rtx first[16], second[8], third[4];
44075 rtvec v;
44076 int i, j;
44078 switch (n)
44080 case 2:
44081 switch (mode)
44083 case V16SImode:
44084 cmode = V8SImode;
44085 break;
44086 case V16SFmode:
44087 cmode = V8SFmode;
44088 break;
44089 case V8DImode:
44090 cmode = V4DImode;
44091 break;
44092 case V8DFmode:
44093 cmode = V4DFmode;
44094 break;
44095 case V8SImode:
44096 cmode = V4SImode;
44097 break;
44098 case V8SFmode:
44099 cmode = V4SFmode;
44100 break;
44101 case V4DImode:
44102 cmode = V2DImode;
44103 break;
44104 case V4DFmode:
44105 cmode = V2DFmode;
44106 break;
44107 case V4SImode:
44108 cmode = V2SImode;
44109 break;
44110 case V4SFmode:
44111 cmode = V2SFmode;
44112 break;
44113 case V2DImode:
44114 cmode = DImode;
44115 break;
44116 case V2SImode:
44117 cmode = SImode;
44118 break;
44119 case V2DFmode:
44120 cmode = DFmode;
44121 break;
44122 case V2SFmode:
44123 cmode = SFmode;
44124 break;
44125 default:
44126 gcc_unreachable ();
44129 if (!register_operand (ops[1], cmode))
44130 ops[1] = force_reg (cmode, ops[1]);
44131 if (!register_operand (ops[0], cmode))
44132 ops[0] = force_reg (cmode, ops[0]);
44133 emit_insn (gen_rtx_SET (VOIDmode, target,
44134 gen_rtx_VEC_CONCAT (mode, ops[0],
44135 ops[1])));
44136 break;
44138 case 4:
44139 switch (mode)
44141 case V4DImode:
44142 cmode = V2DImode;
44143 break;
44144 case V4DFmode:
44145 cmode = V2DFmode;
44146 break;
44147 case V4SImode:
44148 cmode = V2SImode;
44149 break;
44150 case V4SFmode:
44151 cmode = V2SFmode;
44152 break;
44153 default:
44154 gcc_unreachable ();
44156 goto half;
44158 case 8:
44159 switch (mode)
44161 case V8DImode:
44162 cmode = V2DImode;
44163 hmode = V4DImode;
44164 break;
44165 case V8DFmode:
44166 cmode = V2DFmode;
44167 hmode = V4DFmode;
44168 break;
44169 case V8SImode:
44170 cmode = V2SImode;
44171 hmode = V4SImode;
44172 break;
44173 case V8SFmode:
44174 cmode = V2SFmode;
44175 hmode = V4SFmode;
44176 break;
44177 default:
44178 gcc_unreachable ();
44180 goto half;
44182 case 16:
44183 switch (mode)
44185 case V16SImode:
44186 cmode = V2SImode;
44187 hmode = V4SImode;
44188 gmode = V8SImode;
44189 break;
44190 case V16SFmode:
44191 cmode = V2SFmode;
44192 hmode = V4SFmode;
44193 gmode = V8SFmode;
44194 break;
44195 default:
44196 gcc_unreachable ();
44198 goto half;
44200 half:
44201 /* FIXME: We process inputs backward to help RA. PR 36222. */
44202 i = n - 1;
44203 j = (n >> 1) - 1;
44204 for (; i > 0; i -= 2, j--)
44206 first[j] = gen_reg_rtx (cmode);
44207 v = gen_rtvec (2, ops[i - 1], ops[i]);
44208 ix86_expand_vector_init (false, first[j],
44209 gen_rtx_PARALLEL (cmode, v));
44212 n >>= 1;
44213 if (n > 4)
44215 gcc_assert (hmode != VOIDmode);
44216 gcc_assert (gmode != VOIDmode);
44217 for (i = j = 0; i < n; i += 2, j++)
44219 second[j] = gen_reg_rtx (hmode);
44220 ix86_expand_vector_init_concat (hmode, second [j],
44221 &first [i], 2);
44223 n >>= 1;
44224 for (i = j = 0; i < n; i += 2, j++)
44226 third[j] = gen_reg_rtx (gmode);
44227 ix86_expand_vector_init_concat (gmode, third[j],
44228 &second[i], 2);
44230 n >>= 1;
44231 ix86_expand_vector_init_concat (mode, target, third, n);
44233 else if (n > 2)
44235 gcc_assert (hmode != VOIDmode);
44236 for (i = j = 0; i < n; i += 2, j++)
44238 second[j] = gen_reg_rtx (hmode);
44239 ix86_expand_vector_init_concat (hmode, second [j],
44240 &first [i], 2);
44242 n >>= 1;
44243 ix86_expand_vector_init_concat (mode, target, second, n);
44245 else
44246 ix86_expand_vector_init_concat (mode, target, first, n);
44247 break;
44249 default:
44250 gcc_unreachable ();
44254 /* A subroutine of ix86_expand_vector_init_general. Use vector
44255 interleave to handle the most general case: all values variable,
44256 and none identical. */
44258 static void
44259 ix86_expand_vector_init_interleave (machine_mode mode,
44260 rtx target, rtx *ops, int n)
44262 machine_mode first_imode, second_imode, third_imode, inner_mode;
44263 int i, j;
44264 rtx op0, op1;
44265 rtx (*gen_load_even) (rtx, rtx, rtx);
44266 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
44267 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
44269 switch (mode)
44271 case V8HImode:
44272 gen_load_even = gen_vec_setv8hi;
44273 gen_interleave_first_low = gen_vec_interleave_lowv4si;
44274 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44275 inner_mode = HImode;
44276 first_imode = V4SImode;
44277 second_imode = V2DImode;
44278 third_imode = VOIDmode;
44279 break;
44280 case V16QImode:
44281 gen_load_even = gen_vec_setv16qi;
44282 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
44283 gen_interleave_second_low = gen_vec_interleave_lowv4si;
44284 inner_mode = QImode;
44285 first_imode = V8HImode;
44286 second_imode = V4SImode;
44287 third_imode = V2DImode;
44288 break;
44289 default:
44290 gcc_unreachable ();
44293 for (i = 0; i < n; i++)
44295 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
44296 op0 = gen_reg_rtx (SImode);
44297 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
44299 /* Insert the SImode value as low element of V4SImode vector. */
44300 op1 = gen_reg_rtx (V4SImode);
44301 op0 = gen_rtx_VEC_MERGE (V4SImode,
44302 gen_rtx_VEC_DUPLICATE (V4SImode,
44303 op0),
44304 CONST0_RTX (V4SImode),
44305 const1_rtx);
44306 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
44308 /* Cast the V4SImode vector back to a vector in orignal mode. */
44309 op0 = gen_reg_rtx (mode);
44310 emit_move_insn (op0, gen_lowpart (mode, op1));
44312 /* Load even elements into the second position. */
44313 emit_insn (gen_load_even (op0,
44314 force_reg (inner_mode,
44315 ops [i + i + 1]),
44316 const1_rtx));
44318 /* Cast vector to FIRST_IMODE vector. */
44319 ops[i] = gen_reg_rtx (first_imode);
44320 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44323 /* Interleave low FIRST_IMODE vectors. */
44324 for (i = j = 0; i < n; i += 2, j++)
44326 op0 = gen_reg_rtx (first_imode);
44327 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44329 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
44330 ops[j] = gen_reg_rtx (second_imode);
44331 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44334 /* Interleave low SECOND_IMODE vectors. */
44335 switch (second_imode)
44337 case V4SImode:
44338 for (i = j = 0; i < n / 2; i += 2, j++)
44340 op0 = gen_reg_rtx (second_imode);
44341 emit_insn (gen_interleave_second_low (op0, ops[i],
44342 ops[i + 1]));
44344 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44345 vector. */
44346 ops[j] = gen_reg_rtx (third_imode);
44347 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44349 second_imode = V2DImode;
44350 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44351 /* FALLTHRU */
44353 case V2DImode:
44354 op0 = gen_reg_rtx (second_imode);
44355 emit_insn (gen_interleave_second_low (op0, ops[0],
44356 ops[1]));
44358 /* Cast the SECOND_IMODE vector back to a vector on original
44359 mode. */
44360 emit_insn (gen_rtx_SET (VOIDmode, target,
44361 gen_lowpart (mode, op0)));
44362 break;
44364 default:
44365 gcc_unreachable ();
44369 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
44370 all values variable, and none identical. */
44372 static void
44373 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44374 rtx target, rtx vals)
44376 rtx ops[64], op0, op1, op2, op3, op4, op5;
44377 machine_mode half_mode = VOIDmode;
44378 machine_mode quarter_mode = VOIDmode;
44379 int n, i;
44381 switch (mode)
44383 case V2SFmode:
44384 case V2SImode:
44385 if (!mmx_ok && !TARGET_SSE)
44386 break;
44387 /* FALLTHRU */
44389 case V16SImode:
44390 case V16SFmode:
44391 case V8DFmode:
44392 case V8DImode:
44393 case V8SFmode:
44394 case V8SImode:
44395 case V4DFmode:
44396 case V4DImode:
44397 case V4SFmode:
44398 case V4SImode:
44399 case V2DFmode:
44400 case V2DImode:
44401 n = GET_MODE_NUNITS (mode);
44402 for (i = 0; i < n; i++)
44403 ops[i] = XVECEXP (vals, 0, i);
44404 ix86_expand_vector_init_concat (mode, target, ops, n);
44405 return;
44407 case V32QImode:
44408 half_mode = V16QImode;
44409 goto half;
44411 case V16HImode:
44412 half_mode = V8HImode;
44413 goto half;
44415 half:
44416 n = GET_MODE_NUNITS (mode);
44417 for (i = 0; i < n; i++)
44418 ops[i] = XVECEXP (vals, 0, i);
44419 op0 = gen_reg_rtx (half_mode);
44420 op1 = gen_reg_rtx (half_mode);
44421 ix86_expand_vector_init_interleave (half_mode, op0, ops,
44422 n >> 2);
44423 ix86_expand_vector_init_interleave (half_mode, op1,
44424 &ops [n >> 1], n >> 2);
44425 emit_insn (gen_rtx_SET (VOIDmode, target,
44426 gen_rtx_VEC_CONCAT (mode, op0, op1)));
44427 return;
44429 case V64QImode:
44430 quarter_mode = V16QImode;
44431 half_mode = V32QImode;
44432 goto quarter;
44434 case V32HImode:
44435 quarter_mode = V8HImode;
44436 half_mode = V16HImode;
44437 goto quarter;
44439 quarter:
44440 n = GET_MODE_NUNITS (mode);
44441 for (i = 0; i < n; i++)
44442 ops[i] = XVECEXP (vals, 0, i);
44443 op0 = gen_reg_rtx (quarter_mode);
44444 op1 = gen_reg_rtx (quarter_mode);
44445 op2 = gen_reg_rtx (quarter_mode);
44446 op3 = gen_reg_rtx (quarter_mode);
44447 op4 = gen_reg_rtx (half_mode);
44448 op5 = gen_reg_rtx (half_mode);
44449 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44450 n >> 3);
44451 ix86_expand_vector_init_interleave (quarter_mode, op1,
44452 &ops [n >> 2], n >> 3);
44453 ix86_expand_vector_init_interleave (quarter_mode, op2,
44454 &ops [n >> 1], n >> 3);
44455 ix86_expand_vector_init_interleave (quarter_mode, op3,
44456 &ops [(n >> 1) | (n >> 2)], n >> 3);
44457 emit_insn (gen_rtx_SET (VOIDmode, op4,
44458 gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44459 emit_insn (gen_rtx_SET (VOIDmode, op5,
44460 gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44461 emit_insn (gen_rtx_SET (VOIDmode, target,
44462 gen_rtx_VEC_CONCAT (mode, op4, op5)));
44463 return;
44465 case V16QImode:
44466 if (!TARGET_SSE4_1)
44467 break;
44468 /* FALLTHRU */
44470 case V8HImode:
44471 if (!TARGET_SSE2)
44472 break;
44474 /* Don't use ix86_expand_vector_init_interleave if we can't
44475 move from GPR to SSE register directly. */
44476 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44477 break;
44479 n = GET_MODE_NUNITS (mode);
44480 for (i = 0; i < n; i++)
44481 ops[i] = XVECEXP (vals, 0, i);
44482 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44483 return;
44485 case V4HImode:
44486 case V8QImode:
44487 break;
44489 default:
44490 gcc_unreachable ();
44494 int i, j, n_elts, n_words, n_elt_per_word;
44495 machine_mode inner_mode;
44496 rtx words[4], shift;
44498 inner_mode = GET_MODE_INNER (mode);
44499 n_elts = GET_MODE_NUNITS (mode);
44500 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44501 n_elt_per_word = n_elts / n_words;
44502 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44504 for (i = 0; i < n_words; ++i)
44506 rtx word = NULL_RTX;
44508 for (j = 0; j < n_elt_per_word; ++j)
44510 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44511 elt = convert_modes (word_mode, inner_mode, elt, true);
44513 if (j == 0)
44514 word = elt;
44515 else
44517 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44518 word, 1, OPTAB_LIB_WIDEN);
44519 word = expand_simple_binop (word_mode, IOR, word, elt,
44520 word, 1, OPTAB_LIB_WIDEN);
44524 words[i] = word;
44527 if (n_words == 1)
44528 emit_move_insn (target, gen_lowpart (mode, words[0]));
44529 else if (n_words == 2)
44531 rtx tmp = gen_reg_rtx (mode);
44532 emit_clobber (tmp);
44533 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44534 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44535 emit_move_insn (target, tmp);
44537 else if (n_words == 4)
44539 rtx tmp = gen_reg_rtx (V4SImode);
44540 gcc_assert (word_mode == SImode);
44541 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44542 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44543 emit_move_insn (target, gen_lowpart (mode, tmp));
44545 else
44546 gcc_unreachable ();
44550 /* Initialize vector TARGET via VALS. Suppress the use of MMX
44551 instructions unless MMX_OK is true. */
44553 void
44554 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44556 machine_mode mode = GET_MODE (target);
44557 machine_mode inner_mode = GET_MODE_INNER (mode);
44558 int n_elts = GET_MODE_NUNITS (mode);
44559 int n_var = 0, one_var = -1;
44560 bool all_same = true, all_const_zero = true;
44561 int i;
44562 rtx x;
44564 for (i = 0; i < n_elts; ++i)
44566 x = XVECEXP (vals, 0, i);
44567 if (!(CONST_SCALAR_INT_P (x)
44568 || CONST_DOUBLE_P (x)
44569 || CONST_FIXED_P (x)))
44570 n_var++, one_var = i;
44571 else if (x != CONST0_RTX (inner_mode))
44572 all_const_zero = false;
44573 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44574 all_same = false;
44577 /* Constants are best loaded from the constant pool. */
44578 if (n_var == 0)
44580 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44581 return;
44584 /* If all values are identical, broadcast the value. */
44585 if (all_same
44586 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44587 XVECEXP (vals, 0, 0)))
44588 return;
44590 /* Values where only one field is non-constant are best loaded from
44591 the pool and overwritten via move later. */
44592 if (n_var == 1)
44594 if (all_const_zero
44595 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44596 XVECEXP (vals, 0, one_var),
44597 one_var))
44598 return;
44600 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44601 return;
44604 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44607 void
44608 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44610 machine_mode mode = GET_MODE (target);
44611 machine_mode inner_mode = GET_MODE_INNER (mode);
44612 machine_mode half_mode;
44613 bool use_vec_merge = false;
44614 rtx tmp;
44615 static rtx (*gen_extract[6][2]) (rtx, rtx)
44617 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44618 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44619 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44620 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44621 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44622 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44624 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44626 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44627 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44628 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44629 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44630 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44631 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44633 int i, j, n;
44635 switch (mode)
44637 case V2SFmode:
44638 case V2SImode:
44639 if (mmx_ok)
44641 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44642 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
44643 if (elt == 0)
44644 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44645 else
44646 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44647 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44648 return;
44650 break;
44652 case V2DImode:
44653 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
44654 if (use_vec_merge)
44655 break;
44657 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44658 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
44659 if (elt == 0)
44660 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44661 else
44662 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44663 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44664 return;
44666 case V2DFmode:
44668 rtx op0, op1;
44670 /* For the two element vectors, we implement a VEC_CONCAT with
44671 the extraction of the other element. */
44673 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
44674 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
44676 if (elt == 0)
44677 op0 = val, op1 = tmp;
44678 else
44679 op0 = tmp, op1 = val;
44681 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
44682 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44684 return;
44686 case V4SFmode:
44687 use_vec_merge = TARGET_SSE4_1;
44688 if (use_vec_merge)
44689 break;
44691 switch (elt)
44693 case 0:
44694 use_vec_merge = true;
44695 break;
44697 case 1:
44698 /* tmp = target = A B C D */
44699 tmp = copy_to_reg (target);
44700 /* target = A A B B */
44701 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
44702 /* target = X A B B */
44703 ix86_expand_vector_set (false, target, val, 0);
44704 /* target = A X C D */
44705 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44706 const1_rtx, const0_rtx,
44707 GEN_INT (2+4), GEN_INT (3+4)));
44708 return;
44710 case 2:
44711 /* tmp = target = A B C D */
44712 tmp = copy_to_reg (target);
44713 /* tmp = X B C D */
44714 ix86_expand_vector_set (false, tmp, val, 0);
44715 /* target = A B X D */
44716 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44717 const0_rtx, const1_rtx,
44718 GEN_INT (0+4), GEN_INT (3+4)));
44719 return;
44721 case 3:
44722 /* tmp = target = A B C D */
44723 tmp = copy_to_reg (target);
44724 /* tmp = X B C D */
44725 ix86_expand_vector_set (false, tmp, val, 0);
44726 /* target = A B X D */
44727 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44728 const0_rtx, const1_rtx,
44729 GEN_INT (2+4), GEN_INT (0+4)));
44730 return;
44732 default:
44733 gcc_unreachable ();
44735 break;
44737 case V4SImode:
44738 use_vec_merge = TARGET_SSE4_1;
44739 if (use_vec_merge)
44740 break;
44742 /* Element 0 handled by vec_merge below. */
44743 if (elt == 0)
44745 use_vec_merge = true;
44746 break;
44749 if (TARGET_SSE2)
44751 /* With SSE2, use integer shuffles to swap element 0 and ELT,
44752 store into element 0, then shuffle them back. */
44754 rtx order[4];
44756 order[0] = GEN_INT (elt);
44757 order[1] = const1_rtx;
44758 order[2] = const2_rtx;
44759 order[3] = GEN_INT (3);
44760 order[elt] = const0_rtx;
44762 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44763 order[1], order[2], order[3]));
44765 ix86_expand_vector_set (false, target, val, 0);
44767 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44768 order[1], order[2], order[3]));
44770 else
44772 /* For SSE1, we have to reuse the V4SF code. */
44773 rtx t = gen_reg_rtx (V4SFmode);
44774 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
44775 emit_move_insn (target, gen_lowpart (mode, t));
44777 return;
44779 case V8HImode:
44780 use_vec_merge = TARGET_SSE2;
44781 break;
44782 case V4HImode:
44783 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44784 break;
44786 case V16QImode:
44787 use_vec_merge = TARGET_SSE4_1;
44788 break;
44790 case V8QImode:
44791 break;
44793 case V32QImode:
44794 half_mode = V16QImode;
44795 j = 0;
44796 n = 16;
44797 goto half;
44799 case V16HImode:
44800 half_mode = V8HImode;
44801 j = 1;
44802 n = 8;
44803 goto half;
44805 case V8SImode:
44806 half_mode = V4SImode;
44807 j = 2;
44808 n = 4;
44809 goto half;
44811 case V4DImode:
44812 half_mode = V2DImode;
44813 j = 3;
44814 n = 2;
44815 goto half;
44817 case V8SFmode:
44818 half_mode = V4SFmode;
44819 j = 4;
44820 n = 4;
44821 goto half;
44823 case V4DFmode:
44824 half_mode = V2DFmode;
44825 j = 5;
44826 n = 2;
44827 goto half;
44829 half:
44830 /* Compute offset. */
44831 i = elt / n;
44832 elt %= n;
44834 gcc_assert (i <= 1);
44836 /* Extract the half. */
44837 tmp = gen_reg_rtx (half_mode);
44838 emit_insn (gen_extract[j][i] (tmp, target));
44840 /* Put val in tmp at elt. */
44841 ix86_expand_vector_set (false, tmp, val, elt);
44843 /* Put it back. */
44844 emit_insn (gen_insert[j][i] (target, target, tmp));
44845 return;
44847 case V8DFmode:
44848 if (TARGET_AVX512F)
44850 tmp = gen_reg_rtx (mode);
44851 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44852 gen_rtx_VEC_DUPLICATE (mode, val)));
44853 emit_insn (gen_avx512f_blendmv8df (target, tmp, target,
44854 force_reg (QImode, GEN_INT (1 << elt))));
44855 return;
44857 else
44858 break;
44859 case V8DImode:
44860 if (TARGET_AVX512F)
44862 tmp = gen_reg_rtx (mode);
44863 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44864 gen_rtx_VEC_DUPLICATE (mode, val)));
44865 emit_insn (gen_avx512f_blendmv8di (target, tmp, target,
44866 force_reg (QImode, GEN_INT (1 << elt))));
44867 return;
44869 else
44870 break;
44871 case V16SFmode:
44872 if (TARGET_AVX512F)
44874 tmp = gen_reg_rtx (mode);
44875 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44876 gen_rtx_VEC_DUPLICATE (mode, val)));
44877 emit_insn (gen_avx512f_blendmv16sf (target, tmp, target,
44878 force_reg (HImode, GEN_INT (1 << elt))));
44879 return;
44881 else
44882 break;
44883 case V16SImode:
44884 if (TARGET_AVX512F)
44886 tmp = gen_reg_rtx (mode);
44887 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44888 gen_rtx_VEC_DUPLICATE (mode, val)));
44889 emit_insn (gen_avx512f_blendmv16si (target, tmp, target,
44890 force_reg (HImode, GEN_INT (1 << elt))));
44891 return;
44893 else
44894 break;
44895 case V32HImode:
44896 if (TARGET_AVX512F && TARGET_AVX512BW)
44898 tmp = gen_reg_rtx (mode);
44899 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44900 gen_rtx_VEC_DUPLICATE (mode, val)));
44901 emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target,
44902 force_reg (SImode, GEN_INT (1 << elt))));
44903 return;
44905 else
44906 break;
44907 case V64QImode:
44908 if (TARGET_AVX512F && TARGET_AVX512BW)
44910 tmp = gen_reg_rtx (mode);
44911 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44912 gen_rtx_VEC_DUPLICATE (mode, val)));
44913 emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target,
44914 force_reg (DImode, GEN_INT (1 << elt))));
44915 return;
44917 else
44918 break;
44920 default:
44921 break;
44924 if (use_vec_merge)
44926 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
44927 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
44928 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44930 else
44932 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
44934 emit_move_insn (mem, target);
44936 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
44937 emit_move_insn (tmp, val);
44939 emit_move_insn (target, mem);
44943 void
44944 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
44946 machine_mode mode = GET_MODE (vec);
44947 machine_mode inner_mode = GET_MODE_INNER (mode);
44948 bool use_vec_extr = false;
44949 rtx tmp;
44951 switch (mode)
44953 case V2SImode:
44954 case V2SFmode:
44955 if (!mmx_ok)
44956 break;
44957 /* FALLTHRU */
44959 case V2DFmode:
44960 case V2DImode:
44961 use_vec_extr = true;
44962 break;
44964 case V4SFmode:
44965 use_vec_extr = TARGET_SSE4_1;
44966 if (use_vec_extr)
44967 break;
44969 switch (elt)
44971 case 0:
44972 tmp = vec;
44973 break;
44975 case 1:
44976 case 3:
44977 tmp = gen_reg_rtx (mode);
44978 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
44979 GEN_INT (elt), GEN_INT (elt),
44980 GEN_INT (elt+4), GEN_INT (elt+4)));
44981 break;
44983 case 2:
44984 tmp = gen_reg_rtx (mode);
44985 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
44986 break;
44988 default:
44989 gcc_unreachable ();
44991 vec = tmp;
44992 use_vec_extr = true;
44993 elt = 0;
44994 break;
44996 case V4SImode:
44997 use_vec_extr = TARGET_SSE4_1;
44998 if (use_vec_extr)
44999 break;
45001 if (TARGET_SSE2)
45003 switch (elt)
45005 case 0:
45006 tmp = vec;
45007 break;
45009 case 1:
45010 case 3:
45011 tmp = gen_reg_rtx (mode);
45012 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
45013 GEN_INT (elt), GEN_INT (elt),
45014 GEN_INT (elt), GEN_INT (elt)));
45015 break;
45017 case 2:
45018 tmp = gen_reg_rtx (mode);
45019 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
45020 break;
45022 default:
45023 gcc_unreachable ();
45025 vec = tmp;
45026 use_vec_extr = true;
45027 elt = 0;
45029 else
45031 /* For SSE1, we have to reuse the V4SF code. */
45032 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
45033 gen_lowpart (V4SFmode, vec), elt);
45034 return;
45036 break;
45038 case V8HImode:
45039 use_vec_extr = TARGET_SSE2;
45040 break;
45041 case V4HImode:
45042 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45043 break;
45045 case V16QImode:
45046 use_vec_extr = TARGET_SSE4_1;
45047 break;
45049 case V8SFmode:
45050 if (TARGET_AVX)
45052 tmp = gen_reg_rtx (V4SFmode);
45053 if (elt < 4)
45054 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
45055 else
45056 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
45057 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45058 return;
45060 break;
45062 case V4DFmode:
45063 if (TARGET_AVX)
45065 tmp = gen_reg_rtx (V2DFmode);
45066 if (elt < 2)
45067 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
45068 else
45069 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
45070 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45071 return;
45073 break;
45075 case V32QImode:
45076 if (TARGET_AVX)
45078 tmp = gen_reg_rtx (V16QImode);
45079 if (elt < 16)
45080 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
45081 else
45082 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
45083 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45084 return;
45086 break;
45088 case V16HImode:
45089 if (TARGET_AVX)
45091 tmp = gen_reg_rtx (V8HImode);
45092 if (elt < 8)
45093 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
45094 else
45095 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
45096 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45097 return;
45099 break;
45101 case V8SImode:
45102 if (TARGET_AVX)
45104 tmp = gen_reg_rtx (V4SImode);
45105 if (elt < 4)
45106 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
45107 else
45108 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
45109 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45110 return;
45112 break;
45114 case V4DImode:
45115 if (TARGET_AVX)
45117 tmp = gen_reg_rtx (V2DImode);
45118 if (elt < 2)
45119 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
45120 else
45121 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
45122 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45123 return;
45125 break;
45127 case V32HImode:
45128 if (TARGET_AVX512BW)
45130 tmp = gen_reg_rtx (V16HImode);
45131 if (elt < 16)
45132 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
45133 else
45134 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
45135 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45136 return;
45138 break;
45140 case V64QImode:
45141 if (TARGET_AVX512BW)
45143 tmp = gen_reg_rtx (V32QImode);
45144 if (elt < 32)
45145 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
45146 else
45147 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
45148 ix86_expand_vector_extract (false, target, tmp, elt & 31);
45149 return;
45151 break;
45153 case V16SFmode:
45154 tmp = gen_reg_rtx (V8SFmode);
45155 if (elt < 8)
45156 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
45157 else
45158 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
45159 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45160 return;
45162 case V8DFmode:
45163 tmp = gen_reg_rtx (V4DFmode);
45164 if (elt < 4)
45165 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
45166 else
45167 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
45168 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45169 return;
45171 case V16SImode:
45172 tmp = gen_reg_rtx (V8SImode);
45173 if (elt < 8)
45174 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
45175 else
45176 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
45177 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45178 return;
45180 case V8DImode:
45181 tmp = gen_reg_rtx (V4DImode);
45182 if (elt < 4)
45183 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
45184 else
45185 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
45186 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45187 return;
45189 case V8QImode:
45190 /* ??? Could extract the appropriate HImode element and shift. */
45191 default:
45192 break;
45195 if (use_vec_extr)
45197 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
45198 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
45200 /* Let the rtl optimizers know about the zero extension performed. */
45201 if (inner_mode == QImode || inner_mode == HImode)
45203 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
45204 target = gen_lowpart (SImode, target);
45207 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45209 else
45211 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45213 emit_move_insn (mem, vec);
45215 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45216 emit_move_insn (target, tmp);
45220 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
45221 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
45222 The upper bits of DEST are undefined, though they shouldn't cause
45223 exceptions (some bits from src or all zeros are ok). */
45225 static void
45226 emit_reduc_half (rtx dest, rtx src, int i)
45228 rtx tem, d = dest;
45229 switch (GET_MODE (src))
45231 case V4SFmode:
45232 if (i == 128)
45233 tem = gen_sse_movhlps (dest, src, src);
45234 else
45235 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
45236 GEN_INT (1 + 4), GEN_INT (1 + 4));
45237 break;
45238 case V2DFmode:
45239 tem = gen_vec_interleave_highv2df (dest, src, src);
45240 break;
45241 case V16QImode:
45242 case V8HImode:
45243 case V4SImode:
45244 case V2DImode:
45245 d = gen_reg_rtx (V1TImode);
45246 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
45247 GEN_INT (i / 2));
45248 break;
45249 case V8SFmode:
45250 if (i == 256)
45251 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
45252 else
45253 tem = gen_avx_shufps256 (dest, src, src,
45254 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
45255 break;
45256 case V4DFmode:
45257 if (i == 256)
45258 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
45259 else
45260 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
45261 break;
45262 case V32QImode:
45263 case V16HImode:
45264 case V8SImode:
45265 case V4DImode:
45266 if (i == 256)
45268 if (GET_MODE (dest) != V4DImode)
45269 d = gen_reg_rtx (V4DImode);
45270 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
45271 gen_lowpart (V4DImode, src),
45272 const1_rtx);
45274 else
45276 d = gen_reg_rtx (V2TImode);
45277 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
45278 GEN_INT (i / 2));
45280 break;
45281 case V64QImode:
45282 case V32HImode:
45283 case V16SImode:
45284 case V16SFmode:
45285 case V8DImode:
45286 case V8DFmode:
45287 if (i > 128)
45288 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
45289 gen_lowpart (V16SImode, src),
45290 gen_lowpart (V16SImode, src),
45291 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
45292 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
45293 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
45294 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
45295 GEN_INT (0xC), GEN_INT (0xD),
45296 GEN_INT (0xE), GEN_INT (0xF),
45297 GEN_INT (0x10), GEN_INT (0x11),
45298 GEN_INT (0x12), GEN_INT (0x13),
45299 GEN_INT (0x14), GEN_INT (0x15),
45300 GEN_INT (0x16), GEN_INT (0x17));
45301 else
45302 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45303 gen_lowpart (V16SImode, src),
45304 GEN_INT (i == 128 ? 0x2 : 0x1),
45305 GEN_INT (0x3),
45306 GEN_INT (0x3),
45307 GEN_INT (0x3),
45308 GEN_INT (i == 128 ? 0x6 : 0x5),
45309 GEN_INT (0x7),
45310 GEN_INT (0x7),
45311 GEN_INT (0x7),
45312 GEN_INT (i == 128 ? 0xA : 0x9),
45313 GEN_INT (0xB),
45314 GEN_INT (0xB),
45315 GEN_INT (0xB),
45316 GEN_INT (i == 128 ? 0xE : 0xD),
45317 GEN_INT (0xF),
45318 GEN_INT (0xF),
45319 GEN_INT (0xF));
45320 break;
45321 default:
45322 gcc_unreachable ();
45324 emit_insn (tem);
45325 if (d != dest)
45326 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45329 /* Expand a vector reduction. FN is the binary pattern to reduce;
45330 DEST is the destination; IN is the input vector. */
45332 void
45333 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45335 rtx half, dst, vec = in;
45336 machine_mode mode = GET_MODE (in);
45337 int i;
45339 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
45340 if (TARGET_SSE4_1
45341 && mode == V8HImode
45342 && fn == gen_uminv8hi3)
45344 emit_insn (gen_sse4_1_phminposuw (dest, in));
45345 return;
45348 for (i = GET_MODE_BITSIZE (mode);
45349 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
45350 i >>= 1)
45352 half = gen_reg_rtx (mode);
45353 emit_reduc_half (half, vec, i);
45354 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
45355 dst = dest;
45356 else
45357 dst = gen_reg_rtx (mode);
45358 emit_insn (fn (dst, half, vec));
45359 vec = dst;
45363 /* Target hook for scalar_mode_supported_p. */
45364 static bool
45365 ix86_scalar_mode_supported_p (machine_mode mode)
45367 if (DECIMAL_FLOAT_MODE_P (mode))
45368 return default_decimal_float_supported_p ();
45369 else if (mode == TFmode)
45370 return true;
45371 else
45372 return default_scalar_mode_supported_p (mode);
45375 /* Implements target hook vector_mode_supported_p. */
45376 static bool
45377 ix86_vector_mode_supported_p (machine_mode mode)
45379 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45380 return true;
45381 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45382 return true;
45383 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45384 return true;
45385 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45386 return true;
45387 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45388 return true;
45389 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45390 return true;
45391 return false;
45394 /* Implement target hook libgcc_floating_mode_supported_p. */
45395 static bool
45396 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45398 switch (mode)
45400 case SFmode:
45401 case DFmode:
45402 case XFmode:
45403 return true;
45405 case TFmode:
45406 #ifdef IX86_NO_LIBGCC_TFMODE
45407 return false;
45408 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45409 return TARGET_LONG_DOUBLE_128;
45410 #else
45411 return true;
45412 #endif
45414 default:
45415 return false;
45419 /* Target hook for c_mode_for_suffix. */
45420 static machine_mode
45421 ix86_c_mode_for_suffix (char suffix)
45423 if (suffix == 'q')
45424 return TFmode;
45425 if (suffix == 'w')
45426 return XFmode;
45428 return VOIDmode;
45431 /* Worker function for TARGET_MD_ASM_CLOBBERS.
45433 We do this in the new i386 backend to maintain source compatibility
45434 with the old cc0-based compiler. */
45436 static tree
45437 ix86_md_asm_clobbers (tree, tree, tree clobbers)
45439 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
45440 clobbers);
45441 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
45442 clobbers);
45443 return clobbers;
45446 /* Implements target vector targetm.asm.encode_section_info. */
45448 static void ATTRIBUTE_UNUSED
45449 ix86_encode_section_info (tree decl, rtx rtl, int first)
45451 default_encode_section_info (decl, rtl, first);
45453 if (ix86_in_large_data_p (decl))
45454 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45457 /* Worker function for REVERSE_CONDITION. */
45459 enum rtx_code
45460 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45462 return (mode != CCFPmode && mode != CCFPUmode
45463 ? reverse_condition (code)
45464 : reverse_condition_maybe_unordered (code));
45467 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45468 to OPERANDS[0]. */
45470 const char *
45471 output_387_reg_move (rtx insn, rtx *operands)
45473 if (REG_P (operands[0]))
45475 if (REG_P (operands[1])
45476 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45478 if (REGNO (operands[0]) == FIRST_STACK_REG)
45479 return output_387_ffreep (operands, 0);
45480 return "fstp\t%y0";
45482 if (STACK_TOP_P (operands[0]))
45483 return "fld%Z1\t%y1";
45484 return "fst\t%y0";
45486 else if (MEM_P (operands[0]))
45488 gcc_assert (REG_P (operands[1]));
45489 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45490 return "fstp%Z0\t%y0";
45491 else
45493 /* There is no non-popping store to memory for XFmode.
45494 So if we need one, follow the store with a load. */
45495 if (GET_MODE (operands[0]) == XFmode)
45496 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45497 else
45498 return "fst%Z0\t%y0";
45501 else
45502 gcc_unreachable();
45505 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45506 FP status register is set. */
45508 void
45509 ix86_emit_fp_unordered_jump (rtx label)
45511 rtx reg = gen_reg_rtx (HImode);
45512 rtx temp;
45514 emit_insn (gen_x86_fnstsw_1 (reg));
45516 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
45518 emit_insn (gen_x86_sahf_1 (reg));
45520 temp = gen_rtx_REG (CCmode, FLAGS_REG);
45521 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
45523 else
45525 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
45527 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
45528 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
45531 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
45532 gen_rtx_LABEL_REF (VOIDmode, label),
45533 pc_rtx);
45534 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
45536 emit_jump_insn (temp);
45537 predict_jump (REG_BR_PROB_BASE * 10 / 100);
45540 /* Output code to perform a log1p XFmode calculation. */
45542 void ix86_emit_i387_log1p (rtx op0, rtx op1)
45544 rtx_code_label *label1 = gen_label_rtx ();
45545 rtx_code_label *label2 = gen_label_rtx ();
45547 rtx tmp = gen_reg_rtx (XFmode);
45548 rtx tmp2 = gen_reg_rtx (XFmode);
45549 rtx test;
45551 emit_insn (gen_absxf2 (tmp, op1));
45552 test = gen_rtx_GE (VOIDmode, tmp,
45553 CONST_DOUBLE_FROM_REAL_VALUE (
45554 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
45555 XFmode));
45556 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
45558 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45559 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
45560 emit_jump (label2);
45562 emit_label (label1);
45563 emit_move_insn (tmp, CONST1_RTX (XFmode));
45564 emit_insn (gen_addxf3 (tmp, op1, tmp));
45565 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45566 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
45568 emit_label (label2);
45571 /* Emit code for round calculation. */
45572 void ix86_emit_i387_round (rtx op0, rtx op1)
45574 machine_mode inmode = GET_MODE (op1);
45575 machine_mode outmode = GET_MODE (op0);
45576 rtx e1, e2, res, tmp, tmp1, half;
45577 rtx scratch = gen_reg_rtx (HImode);
45578 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
45579 rtx_code_label *jump_label = gen_label_rtx ();
45580 rtx insn;
45581 rtx (*gen_abs) (rtx, rtx);
45582 rtx (*gen_neg) (rtx, rtx);
45584 switch (inmode)
45586 case SFmode:
45587 gen_abs = gen_abssf2;
45588 break;
45589 case DFmode:
45590 gen_abs = gen_absdf2;
45591 break;
45592 case XFmode:
45593 gen_abs = gen_absxf2;
45594 break;
45595 default:
45596 gcc_unreachable ();
45599 switch (outmode)
45601 case SFmode:
45602 gen_neg = gen_negsf2;
45603 break;
45604 case DFmode:
45605 gen_neg = gen_negdf2;
45606 break;
45607 case XFmode:
45608 gen_neg = gen_negxf2;
45609 break;
45610 case HImode:
45611 gen_neg = gen_neghi2;
45612 break;
45613 case SImode:
45614 gen_neg = gen_negsi2;
45615 break;
45616 case DImode:
45617 gen_neg = gen_negdi2;
45618 break;
45619 default:
45620 gcc_unreachable ();
45623 e1 = gen_reg_rtx (inmode);
45624 e2 = gen_reg_rtx (inmode);
45625 res = gen_reg_rtx (outmode);
45627 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
45629 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
45631 /* scratch = fxam(op1) */
45632 emit_insn (gen_rtx_SET (VOIDmode, scratch,
45633 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
45634 UNSPEC_FXAM)));
45635 /* e1 = fabs(op1) */
45636 emit_insn (gen_abs (e1, op1));
45638 /* e2 = e1 + 0.5 */
45639 half = force_reg (inmode, half);
45640 emit_insn (gen_rtx_SET (VOIDmode, e2,
45641 gen_rtx_PLUS (inmode, e1, half)));
45643 /* res = floor(e2) */
45644 if (inmode != XFmode)
45646 tmp1 = gen_reg_rtx (XFmode);
45648 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
45649 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
45651 else
45652 tmp1 = e2;
45654 switch (outmode)
45656 case SFmode:
45657 case DFmode:
45659 rtx tmp0 = gen_reg_rtx (XFmode);
45661 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
45663 emit_insn (gen_rtx_SET (VOIDmode, res,
45664 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
45665 UNSPEC_TRUNC_NOOP)));
45667 break;
45668 case XFmode:
45669 emit_insn (gen_frndintxf2_floor (res, tmp1));
45670 break;
45671 case HImode:
45672 emit_insn (gen_lfloorxfhi2 (res, tmp1));
45673 break;
45674 case SImode:
45675 emit_insn (gen_lfloorxfsi2 (res, tmp1));
45676 break;
45677 case DImode:
45678 emit_insn (gen_lfloorxfdi2 (res, tmp1));
45679 break;
45680 default:
45681 gcc_unreachable ();
45684 /* flags = signbit(a) */
45685 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
45687 /* if (flags) then res = -res */
45688 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
45689 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
45690 gen_rtx_LABEL_REF (VOIDmode, jump_label),
45691 pc_rtx);
45692 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
45693 predict_jump (REG_BR_PROB_BASE * 50 / 100);
45694 JUMP_LABEL (insn) = jump_label;
45696 emit_insn (gen_neg (res, res));
45698 emit_label (jump_label);
45699 LABEL_NUSES (jump_label) = 1;
45701 emit_move_insn (op0, res);
45704 /* Output code to perform a Newton-Rhapson approximation of a single precision
45705 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
45707 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
45709 rtx x0, x1, e0, e1;
45711 x0 = gen_reg_rtx (mode);
45712 e0 = gen_reg_rtx (mode);
45713 e1 = gen_reg_rtx (mode);
45714 x1 = gen_reg_rtx (mode);
45716 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
45718 b = force_reg (mode, b);
45720 /* x0 = rcp(b) estimate */
45721 if (mode == V16SFmode || mode == V8DFmode)
45722 emit_insn (gen_rtx_SET (VOIDmode, x0,
45723 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45724 UNSPEC_RCP14)));
45725 else
45726 emit_insn (gen_rtx_SET (VOIDmode, x0,
45727 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45728 UNSPEC_RCP)));
45730 /* e0 = x0 * b */
45731 emit_insn (gen_rtx_SET (VOIDmode, e0,
45732 gen_rtx_MULT (mode, x0, b)));
45734 /* e0 = x0 * e0 */
45735 emit_insn (gen_rtx_SET (VOIDmode, e0,
45736 gen_rtx_MULT (mode, x0, e0)));
45738 /* e1 = x0 + x0 */
45739 emit_insn (gen_rtx_SET (VOIDmode, e1,
45740 gen_rtx_PLUS (mode, x0, x0)));
45742 /* x1 = e1 - e0 */
45743 emit_insn (gen_rtx_SET (VOIDmode, x1,
45744 gen_rtx_MINUS (mode, e1, e0)));
45746 /* res = a * x1 */
45747 emit_insn (gen_rtx_SET (VOIDmode, res,
45748 gen_rtx_MULT (mode, a, x1)));
45751 /* Output code to perform a Newton-Rhapson approximation of a
45752 single precision floating point [reciprocal] square root. */
45754 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
45755 bool recip)
45757 rtx x0, e0, e1, e2, e3, mthree, mhalf;
45758 REAL_VALUE_TYPE r;
45759 int unspec;
45761 x0 = gen_reg_rtx (mode);
45762 e0 = gen_reg_rtx (mode);
45763 e1 = gen_reg_rtx (mode);
45764 e2 = gen_reg_rtx (mode);
45765 e3 = gen_reg_rtx (mode);
45767 real_from_integer (&r, VOIDmode, -3, SIGNED);
45768 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45770 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
45771 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45772 unspec = UNSPEC_RSQRT;
45774 if (VECTOR_MODE_P (mode))
45776 mthree = ix86_build_const_vector (mode, true, mthree);
45777 mhalf = ix86_build_const_vector (mode, true, mhalf);
45778 /* There is no 512-bit rsqrt. There is however rsqrt14. */
45779 if (GET_MODE_SIZE (mode) == 64)
45780 unspec = UNSPEC_RSQRT14;
45783 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
45784 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
45786 a = force_reg (mode, a);
45788 /* x0 = rsqrt(a) estimate */
45789 emit_insn (gen_rtx_SET (VOIDmode, x0,
45790 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45791 unspec)));
45793 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
45794 if (!recip)
45796 rtx zero, mask;
45798 zero = gen_reg_rtx (mode);
45799 mask = gen_reg_rtx (mode);
45801 zero = force_reg (mode, CONST0_RTX(mode));
45803 /* Handle masked compare. */
45804 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
45806 mask = gen_reg_rtx (HImode);
45807 /* Imm value 0x4 corresponds to not-equal comparison. */
45808 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
45809 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
45811 else
45813 emit_insn (gen_rtx_SET (VOIDmode, mask,
45814 gen_rtx_NE (mode, zero, a)));
45816 emit_insn (gen_rtx_SET (VOIDmode, x0,
45817 gen_rtx_AND (mode, x0, mask)));
45821 /* e0 = x0 * a */
45822 emit_insn (gen_rtx_SET (VOIDmode, e0,
45823 gen_rtx_MULT (mode, x0, a)));
45824 /* e1 = e0 * x0 */
45825 emit_insn (gen_rtx_SET (VOIDmode, e1,
45826 gen_rtx_MULT (mode, e0, x0)));
45828 /* e2 = e1 - 3. */
45829 mthree = force_reg (mode, mthree);
45830 emit_insn (gen_rtx_SET (VOIDmode, e2,
45831 gen_rtx_PLUS (mode, e1, mthree)));
45833 mhalf = force_reg (mode, mhalf);
45834 if (recip)
45835 /* e3 = -.5 * x0 */
45836 emit_insn (gen_rtx_SET (VOIDmode, e3,
45837 gen_rtx_MULT (mode, x0, mhalf)));
45838 else
45839 /* e3 = -.5 * e0 */
45840 emit_insn (gen_rtx_SET (VOIDmode, e3,
45841 gen_rtx_MULT (mode, e0, mhalf)));
45842 /* ret = e2 * e3 */
45843 emit_insn (gen_rtx_SET (VOIDmode, res,
45844 gen_rtx_MULT (mode, e2, e3)));
45847 #ifdef TARGET_SOLARIS
45848 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
45850 static void
45851 i386_solaris_elf_named_section (const char *name, unsigned int flags,
45852 tree decl)
45854 /* With Binutils 2.15, the "@unwind" marker must be specified on
45855 every occurrence of the ".eh_frame" section, not just the first
45856 one. */
45857 if (TARGET_64BIT
45858 && strcmp (name, ".eh_frame") == 0)
45860 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
45861 flags & SECTION_WRITE ? "aw" : "a");
45862 return;
45865 #ifndef USE_GAS
45866 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
45868 solaris_elf_asm_comdat_section (name, flags, decl);
45869 return;
45871 #endif
45873 default_elf_asm_named_section (name, flags, decl);
45875 #endif /* TARGET_SOLARIS */
45877 /* Return the mangling of TYPE if it is an extended fundamental type. */
45879 static const char *
45880 ix86_mangle_type (const_tree type)
45882 type = TYPE_MAIN_VARIANT (type);
45884 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
45885 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
45886 return NULL;
45888 switch (TYPE_MODE (type))
45890 case TFmode:
45891 /* __float128 is "g". */
45892 return "g";
45893 case XFmode:
45894 /* "long double" or __float80 is "e". */
45895 return "e";
45896 default:
45897 return NULL;
45901 /* For 32-bit code we can save PIC register setup by using
45902 __stack_chk_fail_local hidden function instead of calling
45903 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
45904 register, so it is better to call __stack_chk_fail directly. */
45906 static tree ATTRIBUTE_UNUSED
45907 ix86_stack_protect_fail (void)
45909 return TARGET_64BIT
45910 ? default_external_stack_protect_fail ()
45911 : default_hidden_stack_protect_fail ();
45914 /* Select a format to encode pointers in exception handling data. CODE
45915 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
45916 true if the symbol may be affected by dynamic relocations.
45918 ??? All x86 object file formats are capable of representing this.
45919 After all, the relocation needed is the same as for the call insn.
45920 Whether or not a particular assembler allows us to enter such, I
45921 guess we'll have to see. */
45923 asm_preferred_eh_data_format (int code, int global)
45925 if (flag_pic)
45927 int type = DW_EH_PE_sdata8;
45928 if (!TARGET_64BIT
45929 || ix86_cmodel == CM_SMALL_PIC
45930 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
45931 type = DW_EH_PE_sdata4;
45932 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
45934 if (ix86_cmodel == CM_SMALL
45935 || (ix86_cmodel == CM_MEDIUM && code))
45936 return DW_EH_PE_udata4;
45937 return DW_EH_PE_absptr;
45940 /* Expand copysign from SIGN to the positive value ABS_VALUE
45941 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
45942 the sign-bit. */
45943 static void
45944 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
45946 machine_mode mode = GET_MODE (sign);
45947 rtx sgn = gen_reg_rtx (mode);
45948 if (mask == NULL_RTX)
45950 machine_mode vmode;
45952 if (mode == SFmode)
45953 vmode = V4SFmode;
45954 else if (mode == DFmode)
45955 vmode = V2DFmode;
45956 else
45957 vmode = mode;
45959 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
45960 if (!VECTOR_MODE_P (mode))
45962 /* We need to generate a scalar mode mask in this case. */
45963 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45964 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45965 mask = gen_reg_rtx (mode);
45966 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
45969 else
45970 mask = gen_rtx_NOT (mode, mask);
45971 emit_insn (gen_rtx_SET (VOIDmode, sgn,
45972 gen_rtx_AND (mode, mask, sign)));
45973 emit_insn (gen_rtx_SET (VOIDmode, result,
45974 gen_rtx_IOR (mode, abs_value, sgn)));
45977 /* Expand fabs (OP0) and return a new rtx that holds the result. The
45978 mask for masking out the sign-bit is stored in *SMASK, if that is
45979 non-null. */
45980 static rtx
45981 ix86_expand_sse_fabs (rtx op0, rtx *smask)
45983 machine_mode vmode, mode = GET_MODE (op0);
45984 rtx xa, mask;
45986 xa = gen_reg_rtx (mode);
45987 if (mode == SFmode)
45988 vmode = V4SFmode;
45989 else if (mode == DFmode)
45990 vmode = V2DFmode;
45991 else
45992 vmode = mode;
45993 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
45994 if (!VECTOR_MODE_P (mode))
45996 /* We need to generate a scalar mode mask in this case. */
45997 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45998 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45999 mask = gen_reg_rtx (mode);
46000 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
46002 emit_insn (gen_rtx_SET (VOIDmode, xa,
46003 gen_rtx_AND (mode, op0, mask)));
46005 if (smask)
46006 *smask = mask;
46008 return xa;
46011 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
46012 swapping the operands if SWAP_OPERANDS is true. The expanded
46013 code is a forward jump to a newly created label in case the
46014 comparison is true. The generated label rtx is returned. */
46015 static rtx_code_label *
46016 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
46017 bool swap_operands)
46019 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
46020 rtx_code_label *label;
46021 rtx tmp;
46023 if (swap_operands)
46024 std::swap (op0, op1);
46026 label = gen_label_rtx ();
46027 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
46028 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46029 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
46030 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
46031 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
46032 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
46033 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
46034 JUMP_LABEL (tmp) = label;
46036 return label;
46039 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
46040 using comparison code CODE. Operands are swapped for the comparison if
46041 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
46042 static rtx
46043 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
46044 bool swap_operands)
46046 rtx (*insn)(rtx, rtx, rtx, rtx);
46047 machine_mode mode = GET_MODE (op0);
46048 rtx mask = gen_reg_rtx (mode);
46050 if (swap_operands)
46051 std::swap (op0, op1);
46053 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
46055 emit_insn (insn (mask, op0, op1,
46056 gen_rtx_fmt_ee (code, mode, op0, op1)));
46057 return mask;
46060 /* Generate and return a rtx of mode MODE for 2**n where n is the number
46061 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
46062 static rtx
46063 ix86_gen_TWO52 (machine_mode mode)
46065 REAL_VALUE_TYPE TWO52r;
46066 rtx TWO52;
46068 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
46069 TWO52 = const_double_from_real_value (TWO52r, mode);
46070 TWO52 = force_reg (mode, TWO52);
46072 return TWO52;
46075 /* Expand SSE sequence for computing lround from OP1 storing
46076 into OP0. */
46077 void
46078 ix86_expand_lround (rtx op0, rtx op1)
46080 /* C code for the stuff we're doing below:
46081 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
46082 return (long)tmp;
46084 machine_mode mode = GET_MODE (op1);
46085 const struct real_format *fmt;
46086 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46087 rtx adj;
46089 /* load nextafter (0.5, 0.0) */
46090 fmt = REAL_MODE_FORMAT (mode);
46091 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46092 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46094 /* adj = copysign (0.5, op1) */
46095 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
46096 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
46098 /* adj = op1 + adj */
46099 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
46101 /* op0 = (imode)adj */
46102 expand_fix (op0, adj, 0);
46105 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
46106 into OPERAND0. */
46107 void
46108 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
46110 /* C code for the stuff we're doing below (for do_floor):
46111 xi = (long)op1;
46112 xi -= (double)xi > op1 ? 1 : 0;
46113 return xi;
46115 machine_mode fmode = GET_MODE (op1);
46116 machine_mode imode = GET_MODE (op0);
46117 rtx ireg, freg, tmp;
46118 rtx_code_label *label;
46120 /* reg = (long)op1 */
46121 ireg = gen_reg_rtx (imode);
46122 expand_fix (ireg, op1, 0);
46124 /* freg = (double)reg */
46125 freg = gen_reg_rtx (fmode);
46126 expand_float (freg, ireg, 0);
46128 /* ireg = (freg > op1) ? ireg - 1 : ireg */
46129 label = ix86_expand_sse_compare_and_jump (UNLE,
46130 freg, op1, !do_floor);
46131 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
46132 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
46133 emit_move_insn (ireg, tmp);
46135 emit_label (label);
46136 LABEL_NUSES (label) = 1;
46138 emit_move_insn (op0, ireg);
46141 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
46142 result in OPERAND0. */
46143 void
46144 ix86_expand_rint (rtx operand0, rtx operand1)
46146 /* C code for the stuff we're doing below:
46147 xa = fabs (operand1);
46148 if (!isless (xa, 2**52))
46149 return operand1;
46150 xa = xa + 2**52 - 2**52;
46151 return copysign (xa, operand1);
46153 machine_mode mode = GET_MODE (operand0);
46154 rtx res, xa, TWO52, mask;
46155 rtx_code_label *label;
46157 res = gen_reg_rtx (mode);
46158 emit_move_insn (res, operand1);
46160 /* xa = abs (operand1) */
46161 xa = ix86_expand_sse_fabs (res, &mask);
46163 /* if (!isless (xa, TWO52)) goto label; */
46164 TWO52 = ix86_gen_TWO52 (mode);
46165 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46167 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46168 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46170 ix86_sse_copysign_to_positive (res, xa, res, mask);
46172 emit_label (label);
46173 LABEL_NUSES (label) = 1;
46175 emit_move_insn (operand0, res);
46178 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46179 into OPERAND0. */
46180 void
46181 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
46183 /* C code for the stuff we expand below.
46184 double xa = fabs (x), x2;
46185 if (!isless (xa, TWO52))
46186 return x;
46187 xa = xa + TWO52 - TWO52;
46188 x2 = copysign (xa, x);
46189 Compensate. Floor:
46190 if (x2 > x)
46191 x2 -= 1;
46192 Compensate. Ceil:
46193 if (x2 < x)
46194 x2 -= -1;
46195 return x2;
46197 machine_mode mode = GET_MODE (operand0);
46198 rtx xa, TWO52, tmp, one, res, mask;
46199 rtx_code_label *label;
46201 TWO52 = ix86_gen_TWO52 (mode);
46203 /* Temporary for holding the result, initialized to the input
46204 operand to ease control flow. */
46205 res = gen_reg_rtx (mode);
46206 emit_move_insn (res, operand1);
46208 /* xa = abs (operand1) */
46209 xa = ix86_expand_sse_fabs (res, &mask);
46211 /* if (!isless (xa, TWO52)) goto label; */
46212 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46214 /* xa = xa + TWO52 - TWO52; */
46215 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46216 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46218 /* xa = copysign (xa, operand1) */
46219 ix86_sse_copysign_to_positive (xa, xa, res, mask);
46221 /* generate 1.0 or -1.0 */
46222 one = force_reg (mode,
46223 const_double_from_real_value (do_floor
46224 ? dconst1 : dconstm1, mode));
46226 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46227 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46228 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46229 gen_rtx_AND (mode, one, tmp)));
46230 /* We always need to subtract here to preserve signed zero. */
46231 tmp = expand_simple_binop (mode, MINUS,
46232 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46233 emit_move_insn (res, tmp);
46235 emit_label (label);
46236 LABEL_NUSES (label) = 1;
46238 emit_move_insn (operand0, res);
46241 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46242 into OPERAND0. */
46243 void
46244 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
46246 /* C code for the stuff we expand below.
46247 double xa = fabs (x), x2;
46248 if (!isless (xa, TWO52))
46249 return x;
46250 x2 = (double)(long)x;
46251 Compensate. Floor:
46252 if (x2 > x)
46253 x2 -= 1;
46254 Compensate. Ceil:
46255 if (x2 < x)
46256 x2 += 1;
46257 if (HONOR_SIGNED_ZEROS (mode))
46258 return copysign (x2, x);
46259 return x2;
46261 machine_mode mode = GET_MODE (operand0);
46262 rtx xa, xi, TWO52, tmp, one, res, mask;
46263 rtx_code_label *label;
46265 TWO52 = ix86_gen_TWO52 (mode);
46267 /* Temporary for holding the result, initialized to the input
46268 operand to ease control flow. */
46269 res = gen_reg_rtx (mode);
46270 emit_move_insn (res, operand1);
46272 /* xa = abs (operand1) */
46273 xa = ix86_expand_sse_fabs (res, &mask);
46275 /* if (!isless (xa, TWO52)) goto label; */
46276 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46278 /* xa = (double)(long)x */
46279 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46280 expand_fix (xi, res, 0);
46281 expand_float (xa, xi, 0);
46283 /* generate 1.0 */
46284 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46286 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46287 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46288 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46289 gen_rtx_AND (mode, one, tmp)));
46290 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
46291 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46292 emit_move_insn (res, tmp);
46294 if (HONOR_SIGNED_ZEROS (mode))
46295 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46297 emit_label (label);
46298 LABEL_NUSES (label) = 1;
46300 emit_move_insn (operand0, res);
46303 /* Expand SSE sequence for computing round from OPERAND1 storing
46304 into OPERAND0. Sequence that works without relying on DImode truncation
46305 via cvttsd2siq that is only available on 64bit targets. */
46306 void
46307 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46309 /* C code for the stuff we expand below.
46310 double xa = fabs (x), xa2, x2;
46311 if (!isless (xa, TWO52))
46312 return x;
46313 Using the absolute value and copying back sign makes
46314 -0.0 -> -0.0 correct.
46315 xa2 = xa + TWO52 - TWO52;
46316 Compensate.
46317 dxa = xa2 - xa;
46318 if (dxa <= -0.5)
46319 xa2 += 1;
46320 else if (dxa > 0.5)
46321 xa2 -= 1;
46322 x2 = copysign (xa2, x);
46323 return x2;
46325 machine_mode mode = GET_MODE (operand0);
46326 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46327 rtx_code_label *label;
46329 TWO52 = ix86_gen_TWO52 (mode);
46331 /* Temporary for holding the result, initialized to the input
46332 operand to ease control flow. */
46333 res = gen_reg_rtx (mode);
46334 emit_move_insn (res, operand1);
46336 /* xa = abs (operand1) */
46337 xa = ix86_expand_sse_fabs (res, &mask);
46339 /* if (!isless (xa, TWO52)) goto label; */
46340 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46342 /* xa2 = xa + TWO52 - TWO52; */
46343 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46344 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46346 /* dxa = xa2 - xa; */
46347 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46349 /* generate 0.5, 1.0 and -0.5 */
46350 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46351 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46352 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46353 0, OPTAB_DIRECT);
46355 /* Compensate. */
46356 tmp = gen_reg_rtx (mode);
46357 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46358 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46359 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46360 gen_rtx_AND (mode, one, tmp)));
46361 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46362 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46363 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46364 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46365 gen_rtx_AND (mode, one, tmp)));
46366 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46368 /* res = copysign (xa2, operand1) */
46369 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46371 emit_label (label);
46372 LABEL_NUSES (label) = 1;
46374 emit_move_insn (operand0, res);
46377 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46378 into OPERAND0. */
46379 void
46380 ix86_expand_trunc (rtx operand0, rtx operand1)
46382 /* C code for SSE variant we expand below.
46383 double xa = fabs (x), x2;
46384 if (!isless (xa, TWO52))
46385 return x;
46386 x2 = (double)(long)x;
46387 if (HONOR_SIGNED_ZEROS (mode))
46388 return copysign (x2, x);
46389 return x2;
46391 machine_mode mode = GET_MODE (operand0);
46392 rtx xa, xi, TWO52, res, mask;
46393 rtx_code_label *label;
46395 TWO52 = ix86_gen_TWO52 (mode);
46397 /* Temporary for holding the result, initialized to the input
46398 operand to ease control flow. */
46399 res = gen_reg_rtx (mode);
46400 emit_move_insn (res, operand1);
46402 /* xa = abs (operand1) */
46403 xa = ix86_expand_sse_fabs (res, &mask);
46405 /* if (!isless (xa, TWO52)) goto label; */
46406 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46408 /* x = (double)(long)x */
46409 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46410 expand_fix (xi, res, 0);
46411 expand_float (res, xi, 0);
46413 if (HONOR_SIGNED_ZEROS (mode))
46414 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46416 emit_label (label);
46417 LABEL_NUSES (label) = 1;
46419 emit_move_insn (operand0, res);
46422 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46423 into OPERAND0. */
46424 void
46425 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46427 machine_mode mode = GET_MODE (operand0);
46428 rtx xa, mask, TWO52, one, res, smask, tmp;
46429 rtx_code_label *label;
46431 /* C code for SSE variant we expand below.
46432 double xa = fabs (x), x2;
46433 if (!isless (xa, TWO52))
46434 return x;
46435 xa2 = xa + TWO52 - TWO52;
46436 Compensate:
46437 if (xa2 > xa)
46438 xa2 -= 1.0;
46439 x2 = copysign (xa2, x);
46440 return x2;
46443 TWO52 = ix86_gen_TWO52 (mode);
46445 /* Temporary for holding the result, initialized to the input
46446 operand to ease control flow. */
46447 res = gen_reg_rtx (mode);
46448 emit_move_insn (res, operand1);
46450 /* xa = abs (operand1) */
46451 xa = ix86_expand_sse_fabs (res, &smask);
46453 /* if (!isless (xa, TWO52)) goto label; */
46454 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46456 /* res = xa + TWO52 - TWO52; */
46457 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46458 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46459 emit_move_insn (res, tmp);
46461 /* generate 1.0 */
46462 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46464 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
46465 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46466 emit_insn (gen_rtx_SET (VOIDmode, mask,
46467 gen_rtx_AND (mode, mask, one)));
46468 tmp = expand_simple_binop (mode, MINUS,
46469 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46470 emit_move_insn (res, tmp);
46472 /* res = copysign (res, operand1) */
46473 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46475 emit_label (label);
46476 LABEL_NUSES (label) = 1;
46478 emit_move_insn (operand0, res);
46481 /* Expand SSE sequence for computing round from OPERAND1 storing
46482 into OPERAND0. */
46483 void
46484 ix86_expand_round (rtx operand0, rtx operand1)
46486 /* C code for the stuff we're doing below:
46487 double xa = fabs (x);
46488 if (!isless (xa, TWO52))
46489 return x;
46490 xa = (double)(long)(xa + nextafter (0.5, 0.0));
46491 return copysign (xa, x);
46493 machine_mode mode = GET_MODE (operand0);
46494 rtx res, TWO52, xa, xi, half, mask;
46495 rtx_code_label *label;
46496 const struct real_format *fmt;
46497 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46499 /* Temporary for holding the result, initialized to the input
46500 operand to ease control flow. */
46501 res = gen_reg_rtx (mode);
46502 emit_move_insn (res, operand1);
46504 TWO52 = ix86_gen_TWO52 (mode);
46505 xa = ix86_expand_sse_fabs (res, &mask);
46506 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46508 /* load nextafter (0.5, 0.0) */
46509 fmt = REAL_MODE_FORMAT (mode);
46510 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46511 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46513 /* xa = xa + 0.5 */
46514 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46515 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46517 /* xa = (double)(int64_t)xa */
46518 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46519 expand_fix (xi, xa, 0);
46520 expand_float (xa, xi, 0);
46522 /* res = copysign (xa, operand1) */
46523 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46525 emit_label (label);
46526 LABEL_NUSES (label) = 1;
46528 emit_move_insn (operand0, res);
46531 /* Expand SSE sequence for computing round
46532 from OP1 storing into OP0 using sse4 round insn. */
46533 void
46534 ix86_expand_round_sse4 (rtx op0, rtx op1)
46536 machine_mode mode = GET_MODE (op0);
46537 rtx e1, e2, res, half;
46538 const struct real_format *fmt;
46539 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46540 rtx (*gen_copysign) (rtx, rtx, rtx);
46541 rtx (*gen_round) (rtx, rtx, rtx);
46543 switch (mode)
46545 case SFmode:
46546 gen_copysign = gen_copysignsf3;
46547 gen_round = gen_sse4_1_roundsf2;
46548 break;
46549 case DFmode:
46550 gen_copysign = gen_copysigndf3;
46551 gen_round = gen_sse4_1_rounddf2;
46552 break;
46553 default:
46554 gcc_unreachable ();
46557 /* round (a) = trunc (a + copysign (0.5, a)) */
46559 /* load nextafter (0.5, 0.0) */
46560 fmt = REAL_MODE_FORMAT (mode);
46561 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46562 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46563 half = const_double_from_real_value (pred_half, mode);
46565 /* e1 = copysign (0.5, op1) */
46566 e1 = gen_reg_rtx (mode);
46567 emit_insn (gen_copysign (e1, half, op1));
46569 /* e2 = op1 + e1 */
46570 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46572 /* res = trunc (e2) */
46573 res = gen_reg_rtx (mode);
46574 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46576 emit_move_insn (op0, res);
46580 /* Table of valid machine attributes. */
46581 static const struct attribute_spec ix86_attribute_table[] =
46583 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
46584 affects_type_identity } */
46585 /* Stdcall attribute says callee is responsible for popping arguments
46586 if they are not variable. */
46587 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46588 true },
46589 /* Fastcall attribute says callee is responsible for popping arguments
46590 if they are not variable. */
46591 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46592 true },
46593 /* Thiscall attribute says callee is responsible for popping arguments
46594 if they are not variable. */
46595 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46596 true },
46597 /* Cdecl attribute says the callee is a normal C declaration */
46598 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46599 true },
46600 /* Regparm attribute specifies how many integer arguments are to be
46601 passed in registers. */
46602 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
46603 true },
46604 /* Sseregparm attribute says we are using x86_64 calling conventions
46605 for FP arguments. */
46606 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46607 true },
46608 /* The transactional memory builtins are implicitly regparm or fastcall
46609 depending on the ABI. Override the generic do-nothing attribute that
46610 these builtins were declared with. */
46611 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
46612 true },
46613 /* force_align_arg_pointer says this function realigns the stack at entry. */
46614 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
46615 false, true, true, ix86_handle_cconv_attribute, false },
46616 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46617 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
46618 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
46619 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
46620 false },
46621 #endif
46622 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46623 false },
46624 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46625 false },
46626 #ifdef SUBTARGET_ATTRIBUTE_TABLE
46627 SUBTARGET_ATTRIBUTE_TABLE,
46628 #endif
46629 /* ms_abi and sysv_abi calling convention function attributes. */
46630 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46631 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46632 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
46633 false },
46634 { "callee_pop_aggregate_return", 1, 1, false, true, true,
46635 ix86_handle_callee_pop_aggregate_return, true },
46636 /* End element. */
46637 { NULL, 0, 0, false, false, false, NULL, false }
46640 /* Implement targetm.vectorize.builtin_vectorization_cost. */
46641 static int
46642 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
46643 tree vectype, int)
46645 unsigned elements;
46647 switch (type_of_cost)
46649 case scalar_stmt:
46650 return ix86_cost->scalar_stmt_cost;
46652 case scalar_load:
46653 return ix86_cost->scalar_load_cost;
46655 case scalar_store:
46656 return ix86_cost->scalar_store_cost;
46658 case vector_stmt:
46659 return ix86_cost->vec_stmt_cost;
46661 case vector_load:
46662 return ix86_cost->vec_align_load_cost;
46664 case vector_store:
46665 return ix86_cost->vec_store_cost;
46667 case vec_to_scalar:
46668 return ix86_cost->vec_to_scalar_cost;
46670 case scalar_to_vec:
46671 return ix86_cost->scalar_to_vec_cost;
46673 case unaligned_load:
46674 case unaligned_store:
46675 return ix86_cost->vec_unalign_load_cost;
46677 case cond_branch_taken:
46678 return ix86_cost->cond_taken_branch_cost;
46680 case cond_branch_not_taken:
46681 return ix86_cost->cond_not_taken_branch_cost;
46683 case vec_perm:
46684 case vec_promote_demote:
46685 return ix86_cost->vec_stmt_cost;
46687 case vec_construct:
46688 elements = TYPE_VECTOR_SUBPARTS (vectype);
46689 return ix86_cost->vec_stmt_cost * (elements / 2 + 1);
46691 default:
46692 gcc_unreachable ();
46696 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
46697 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
46698 insn every time. */
46700 static GTY(()) rtx_insn *vselect_insn;
46702 /* Initialize vselect_insn. */
46704 static void
46705 init_vselect_insn (void)
46707 unsigned i;
46708 rtx x;
46710 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
46711 for (i = 0; i < MAX_VECT_LEN; ++i)
46712 XVECEXP (x, 0, i) = const0_rtx;
46713 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
46714 const0_rtx), x);
46715 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
46716 start_sequence ();
46717 vselect_insn = emit_insn (x);
46718 end_sequence ();
46721 /* Construct (set target (vec_select op0 (parallel perm))) and
46722 return true if that's a valid instruction in the active ISA. */
46724 static bool
46725 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
46726 unsigned nelt, bool testing_p)
46728 unsigned int i;
46729 rtx x, save_vconcat;
46730 int icode;
46732 if (vselect_insn == NULL_RTX)
46733 init_vselect_insn ();
46735 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
46736 PUT_NUM_ELEM (XVEC (x, 0), nelt);
46737 for (i = 0; i < nelt; ++i)
46738 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
46739 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46740 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
46741 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
46742 SET_DEST (PATTERN (vselect_insn)) = target;
46743 icode = recog_memoized (vselect_insn);
46745 if (icode >= 0 && !testing_p)
46746 emit_insn (copy_rtx (PATTERN (vselect_insn)));
46748 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
46749 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
46750 INSN_CODE (vselect_insn) = -1;
46752 return icode >= 0;
46755 /* Similar, but generate a vec_concat from op0 and op1 as well. */
46757 static bool
46758 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
46759 const unsigned char *perm, unsigned nelt,
46760 bool testing_p)
46762 machine_mode v2mode;
46763 rtx x;
46764 bool ok;
46766 if (vselect_insn == NULL_RTX)
46767 init_vselect_insn ();
46769 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
46770 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46771 PUT_MODE (x, v2mode);
46772 XEXP (x, 0) = op0;
46773 XEXP (x, 1) = op1;
46774 ok = expand_vselect (target, x, perm, nelt, testing_p);
46775 XEXP (x, 0) = const0_rtx;
46776 XEXP (x, 1) = const0_rtx;
46777 return ok;
46780 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46781 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
46783 static bool
46784 expand_vec_perm_blend (struct expand_vec_perm_d *d)
46786 machine_mode vmode = d->vmode;
46787 unsigned i, mask, nelt = d->nelt;
46788 rtx target, op0, op1, x;
46789 rtx rperm[32], vperm;
46791 if (d->one_operand_p)
46792 return false;
46793 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
46794 && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
46796 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
46798 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
46800 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
46802 else
46803 return false;
46805 /* This is a blend, not a permute. Elements must stay in their
46806 respective lanes. */
46807 for (i = 0; i < nelt; ++i)
46809 unsigned e = d->perm[i];
46810 if (!(e == i || e == i + nelt))
46811 return false;
46814 if (d->testing_p)
46815 return true;
46817 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
46818 decision should be extracted elsewhere, so that we only try that
46819 sequence once all budget==3 options have been tried. */
46820 target = d->target;
46821 op0 = d->op0;
46822 op1 = d->op1;
46823 mask = 0;
46825 switch (vmode)
46827 case V8DFmode:
46828 case V16SFmode:
46829 case V4DFmode:
46830 case V8SFmode:
46831 case V2DFmode:
46832 case V4SFmode:
46833 case V8HImode:
46834 case V8SImode:
46835 case V32HImode:
46836 case V64QImode:
46837 case V16SImode:
46838 case V8DImode:
46839 for (i = 0; i < nelt; ++i)
46840 mask |= (d->perm[i] >= nelt) << i;
46841 break;
46843 case V2DImode:
46844 for (i = 0; i < 2; ++i)
46845 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
46846 vmode = V8HImode;
46847 goto do_subreg;
46849 case V4SImode:
46850 for (i = 0; i < 4; ++i)
46851 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46852 vmode = V8HImode;
46853 goto do_subreg;
46855 case V16QImode:
46856 /* See if bytes move in pairs so we can use pblendw with
46857 an immediate argument, rather than pblendvb with a vector
46858 argument. */
46859 for (i = 0; i < 16; i += 2)
46860 if (d->perm[i] + 1 != d->perm[i + 1])
46862 use_pblendvb:
46863 for (i = 0; i < nelt; ++i)
46864 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
46866 finish_pblendvb:
46867 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
46868 vperm = force_reg (vmode, vperm);
46870 if (GET_MODE_SIZE (vmode) == 16)
46871 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
46872 else
46873 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
46874 if (target != d->target)
46875 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46876 return true;
46879 for (i = 0; i < 8; ++i)
46880 mask |= (d->perm[i * 2] >= 16) << i;
46881 vmode = V8HImode;
46882 /* FALLTHRU */
46884 do_subreg:
46885 target = gen_reg_rtx (vmode);
46886 op0 = gen_lowpart (vmode, op0);
46887 op1 = gen_lowpart (vmode, op1);
46888 break;
46890 case V32QImode:
46891 /* See if bytes move in pairs. If not, vpblendvb must be used. */
46892 for (i = 0; i < 32; i += 2)
46893 if (d->perm[i] + 1 != d->perm[i + 1])
46894 goto use_pblendvb;
46895 /* See if bytes move in quadruplets. If yes, vpblendd
46896 with immediate can be used. */
46897 for (i = 0; i < 32; i += 4)
46898 if (d->perm[i] + 2 != d->perm[i + 2])
46899 break;
46900 if (i < 32)
46902 /* See if bytes move the same in both lanes. If yes,
46903 vpblendw with immediate can be used. */
46904 for (i = 0; i < 16; i += 2)
46905 if (d->perm[i] + 16 != d->perm[i + 16])
46906 goto use_pblendvb;
46908 /* Use vpblendw. */
46909 for (i = 0; i < 16; ++i)
46910 mask |= (d->perm[i * 2] >= 32) << i;
46911 vmode = V16HImode;
46912 goto do_subreg;
46915 /* Use vpblendd. */
46916 for (i = 0; i < 8; ++i)
46917 mask |= (d->perm[i * 4] >= 32) << i;
46918 vmode = V8SImode;
46919 goto do_subreg;
46921 case V16HImode:
46922 /* See if words move in pairs. If yes, vpblendd can be used. */
46923 for (i = 0; i < 16; i += 2)
46924 if (d->perm[i] + 1 != d->perm[i + 1])
46925 break;
46926 if (i < 16)
46928 /* See if words move the same in both lanes. If not,
46929 vpblendvb must be used. */
46930 for (i = 0; i < 8; i++)
46931 if (d->perm[i] + 8 != d->perm[i + 8])
46933 /* Use vpblendvb. */
46934 for (i = 0; i < 32; ++i)
46935 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
46937 vmode = V32QImode;
46938 nelt = 32;
46939 target = gen_reg_rtx (vmode);
46940 op0 = gen_lowpart (vmode, op0);
46941 op1 = gen_lowpart (vmode, op1);
46942 goto finish_pblendvb;
46945 /* Use vpblendw. */
46946 for (i = 0; i < 16; ++i)
46947 mask |= (d->perm[i] >= 16) << i;
46948 break;
46951 /* Use vpblendd. */
46952 for (i = 0; i < 8; ++i)
46953 mask |= (d->perm[i * 2] >= 16) << i;
46954 vmode = V8SImode;
46955 goto do_subreg;
46957 case V4DImode:
46958 /* Use vpblendd. */
46959 for (i = 0; i < 4; ++i)
46960 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46961 vmode = V8SImode;
46962 goto do_subreg;
46964 default:
46965 gcc_unreachable ();
46968 /* This matches five different patterns with the different modes. */
46969 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
46970 x = gen_rtx_SET (VOIDmode, target, x);
46971 emit_insn (x);
46972 if (target != d->target)
46973 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46975 return true;
46978 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46979 in terms of the variable form of vpermilps.
46981 Note that we will have already failed the immediate input vpermilps,
46982 which requires that the high and low part shuffle be identical; the
46983 variable form doesn't require that. */
46985 static bool
46986 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
46988 rtx rperm[8], vperm;
46989 unsigned i;
46991 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
46992 return false;
46994 /* We can only permute within the 128-bit lane. */
46995 for (i = 0; i < 8; ++i)
46997 unsigned e = d->perm[i];
46998 if (i < 4 ? e >= 4 : e < 4)
46999 return false;
47002 if (d->testing_p)
47003 return true;
47005 for (i = 0; i < 8; ++i)
47007 unsigned e = d->perm[i];
47009 /* Within each 128-bit lane, the elements of op0 are numbered
47010 from 0 and the elements of op1 are numbered from 4. */
47011 if (e >= 8 + 4)
47012 e -= 8;
47013 else if (e >= 4)
47014 e -= 4;
47016 rperm[i] = GEN_INT (e);
47019 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
47020 vperm = force_reg (V8SImode, vperm);
47021 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
47023 return true;
47026 /* Return true if permutation D can be performed as VMODE permutation
47027 instead. */
47029 static bool
47030 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
47032 unsigned int i, j, chunk;
47034 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
47035 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
47036 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
47037 return false;
47039 if (GET_MODE_NUNITS (vmode) >= d->nelt)
47040 return true;
47042 chunk = d->nelt / GET_MODE_NUNITS (vmode);
47043 for (i = 0; i < d->nelt; i += chunk)
47044 if (d->perm[i] & (chunk - 1))
47045 return false;
47046 else
47047 for (j = 1; j < chunk; ++j)
47048 if (d->perm[i] + j != d->perm[i + j])
47049 return false;
47051 return true;
47054 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47055 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
47057 static bool
47058 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
47060 unsigned i, nelt, eltsz, mask;
47061 unsigned char perm[64];
47062 machine_mode vmode = V16QImode;
47063 rtx rperm[64], vperm, target, op0, op1;
47065 nelt = d->nelt;
47067 if (!d->one_operand_p)
47069 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
47071 if (TARGET_AVX2
47072 && valid_perm_using_mode_p (V2TImode, d))
47074 if (d->testing_p)
47075 return true;
47077 /* Use vperm2i128 insn. The pattern uses
47078 V4DImode instead of V2TImode. */
47079 target = d->target;
47080 if (d->vmode != V4DImode)
47081 target = gen_reg_rtx (V4DImode);
47082 op0 = gen_lowpart (V4DImode, d->op0);
47083 op1 = gen_lowpart (V4DImode, d->op1);
47084 rperm[0]
47085 = GEN_INT ((d->perm[0] / (nelt / 2))
47086 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
47087 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
47088 if (target != d->target)
47089 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47090 return true;
47092 return false;
47095 else
47097 if (GET_MODE_SIZE (d->vmode) == 16)
47099 if (!TARGET_SSSE3)
47100 return false;
47102 else if (GET_MODE_SIZE (d->vmode) == 32)
47104 if (!TARGET_AVX2)
47105 return false;
47107 /* V4DImode should be already handled through
47108 expand_vselect by vpermq instruction. */
47109 gcc_assert (d->vmode != V4DImode);
47111 vmode = V32QImode;
47112 if (d->vmode == V8SImode
47113 || d->vmode == V16HImode
47114 || d->vmode == V32QImode)
47116 /* First see if vpermq can be used for
47117 V8SImode/V16HImode/V32QImode. */
47118 if (valid_perm_using_mode_p (V4DImode, d))
47120 for (i = 0; i < 4; i++)
47121 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
47122 if (d->testing_p)
47123 return true;
47124 target = gen_reg_rtx (V4DImode);
47125 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
47126 perm, 4, false))
47128 emit_move_insn (d->target,
47129 gen_lowpart (d->vmode, target));
47130 return true;
47132 return false;
47135 /* Next see if vpermd can be used. */
47136 if (valid_perm_using_mode_p (V8SImode, d))
47137 vmode = V8SImode;
47139 /* Or if vpermps can be used. */
47140 else if (d->vmode == V8SFmode)
47141 vmode = V8SImode;
47143 if (vmode == V32QImode)
47145 /* vpshufb only works intra lanes, it is not
47146 possible to shuffle bytes in between the lanes. */
47147 for (i = 0; i < nelt; ++i)
47148 if ((d->perm[i] ^ i) & (nelt / 2))
47149 return false;
47152 else if (GET_MODE_SIZE (d->vmode) == 64)
47154 if (!TARGET_AVX512BW)
47155 return false;
47157 /* If vpermq didn't work, vpshufb won't work either. */
47158 if (d->vmode == V8DFmode || d->vmode == V8DImode)
47159 return false;
47161 vmode = V64QImode;
47162 if (d->vmode == V16SImode
47163 || d->vmode == V32HImode
47164 || d->vmode == V64QImode)
47166 /* First see if vpermq can be used for
47167 V16SImode/V32HImode/V64QImode. */
47168 if (valid_perm_using_mode_p (V8DImode, d))
47170 for (i = 0; i < 8; i++)
47171 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
47172 if (d->testing_p)
47173 return true;
47174 target = gen_reg_rtx (V8DImode);
47175 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
47176 perm, 8, false))
47178 emit_move_insn (d->target,
47179 gen_lowpart (d->vmode, target));
47180 return true;
47182 return false;
47185 /* Next see if vpermd can be used. */
47186 if (valid_perm_using_mode_p (V16SImode, d))
47187 vmode = V16SImode;
47189 /* Or if vpermps can be used. */
47190 else if (d->vmode == V16SFmode)
47191 vmode = V16SImode;
47192 if (vmode == V64QImode)
47194 /* vpshufb only works intra lanes, it is not
47195 possible to shuffle bytes in between the lanes. */
47196 for (i = 0; i < nelt; ++i)
47197 if ((d->perm[i] ^ i) & (nelt / 4))
47198 return false;
47201 else
47202 return false;
47205 if (d->testing_p)
47206 return true;
47208 if (vmode == V8SImode)
47209 for (i = 0; i < 8; ++i)
47210 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
47211 else if (vmode == V16SImode)
47212 for (i = 0; i < 16; ++i)
47213 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
47214 else
47216 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47217 if (!d->one_operand_p)
47218 mask = 2 * nelt - 1;
47219 else if (vmode == V16QImode)
47220 mask = nelt - 1;
47221 else if (vmode == V64QImode)
47222 mask = nelt / 4 - 1;
47223 else
47224 mask = nelt / 2 - 1;
47226 for (i = 0; i < nelt; ++i)
47228 unsigned j, e = d->perm[i] & mask;
47229 for (j = 0; j < eltsz; ++j)
47230 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
47234 vperm = gen_rtx_CONST_VECTOR (vmode,
47235 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
47236 vperm = force_reg (vmode, vperm);
47238 target = d->target;
47239 if (d->vmode != vmode)
47240 target = gen_reg_rtx (vmode);
47241 op0 = gen_lowpart (vmode, d->op0);
47242 if (d->one_operand_p)
47244 if (vmode == V16QImode)
47245 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
47246 else if (vmode == V32QImode)
47247 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
47248 else if (vmode == V64QImode)
47249 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
47250 else if (vmode == V8SFmode)
47251 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
47252 else if (vmode == V8SImode)
47253 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
47254 else if (vmode == V16SFmode)
47255 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
47256 else if (vmode == V16SImode)
47257 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
47258 else
47259 gcc_unreachable ();
47261 else
47263 op1 = gen_lowpart (vmode, d->op1);
47264 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
47266 if (target != d->target)
47267 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47269 return true;
47272 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
47273 in a single instruction. */
47275 static bool
47276 expand_vec_perm_1 (struct expand_vec_perm_d *d)
47278 unsigned i, nelt = d->nelt;
47279 unsigned char perm2[MAX_VECT_LEN];
47281 /* Check plain VEC_SELECT first, because AVX has instructions that could
47282 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47283 input where SEL+CONCAT may not. */
47284 if (d->one_operand_p)
47286 int mask = nelt - 1;
47287 bool identity_perm = true;
47288 bool broadcast_perm = true;
47290 for (i = 0; i < nelt; i++)
47292 perm2[i] = d->perm[i] & mask;
47293 if (perm2[i] != i)
47294 identity_perm = false;
47295 if (perm2[i])
47296 broadcast_perm = false;
47299 if (identity_perm)
47301 if (!d->testing_p)
47302 emit_move_insn (d->target, d->op0);
47303 return true;
47305 else if (broadcast_perm && TARGET_AVX2)
47307 /* Use vpbroadcast{b,w,d}. */
47308 rtx (*gen) (rtx, rtx) = NULL;
47309 switch (d->vmode)
47311 case V64QImode:
47312 if (TARGET_AVX512BW)
47313 gen = gen_avx512bw_vec_dupv64qi_1;
47314 break;
47315 case V32QImode:
47316 gen = gen_avx2_pbroadcastv32qi_1;
47317 break;
47318 case V32HImode:
47319 if (TARGET_AVX512BW)
47320 gen = gen_avx512bw_vec_dupv32hi_1;
47321 break;
47322 case V16HImode:
47323 gen = gen_avx2_pbroadcastv16hi_1;
47324 break;
47325 case V16SImode:
47326 if (TARGET_AVX512F)
47327 gen = gen_avx512f_vec_dupv16si_1;
47328 break;
47329 case V8SImode:
47330 gen = gen_avx2_pbroadcastv8si_1;
47331 break;
47332 case V16QImode:
47333 gen = gen_avx2_pbroadcastv16qi;
47334 break;
47335 case V8HImode:
47336 gen = gen_avx2_pbroadcastv8hi;
47337 break;
47338 case V16SFmode:
47339 if (TARGET_AVX512F)
47340 gen = gen_avx512f_vec_dupv16sf_1;
47341 break;
47342 case V8SFmode:
47343 gen = gen_avx2_vec_dupv8sf_1;
47344 break;
47345 case V8DFmode:
47346 if (TARGET_AVX512F)
47347 gen = gen_avx512f_vec_dupv8df_1;
47348 break;
47349 case V8DImode:
47350 if (TARGET_AVX512F)
47351 gen = gen_avx512f_vec_dupv8di_1;
47352 break;
47353 /* For other modes prefer other shuffles this function creates. */
47354 default: break;
47356 if (gen != NULL)
47358 if (!d->testing_p)
47359 emit_insn (gen (d->target, d->op0));
47360 return true;
47364 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47365 return true;
47367 /* There are plenty of patterns in sse.md that are written for
47368 SEL+CONCAT and are not replicated for a single op. Perhaps
47369 that should be changed, to avoid the nastiness here. */
47371 /* Recognize interleave style patterns, which means incrementing
47372 every other permutation operand. */
47373 for (i = 0; i < nelt; i += 2)
47375 perm2[i] = d->perm[i] & mask;
47376 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47378 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47379 d->testing_p))
47380 return true;
47382 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
47383 if (nelt >= 4)
47385 for (i = 0; i < nelt; i += 4)
47387 perm2[i + 0] = d->perm[i + 0] & mask;
47388 perm2[i + 1] = d->perm[i + 1] & mask;
47389 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47390 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47393 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47394 d->testing_p))
47395 return true;
47399 /* Finally, try the fully general two operand permute. */
47400 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47401 d->testing_p))
47402 return true;
47404 /* Recognize interleave style patterns with reversed operands. */
47405 if (!d->one_operand_p)
47407 for (i = 0; i < nelt; ++i)
47409 unsigned e = d->perm[i];
47410 if (e >= nelt)
47411 e -= nelt;
47412 else
47413 e += nelt;
47414 perm2[i] = e;
47417 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47418 d->testing_p))
47419 return true;
47422 /* Try the SSE4.1 blend variable merge instructions. */
47423 if (expand_vec_perm_blend (d))
47424 return true;
47426 /* Try one of the AVX vpermil variable permutations. */
47427 if (expand_vec_perm_vpermil (d))
47428 return true;
47430 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47431 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47432 if (expand_vec_perm_pshufb (d))
47433 return true;
47435 /* Try the AVX2 vpalignr instruction. */
47436 if (expand_vec_perm_palignr (d, true))
47437 return true;
47439 /* Try the AVX512F vpermi2 instructions. */
47440 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47441 return true;
47443 return false;
47446 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47447 in terms of a pair of pshuflw + pshufhw instructions. */
47449 static bool
47450 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47452 unsigned char perm2[MAX_VECT_LEN];
47453 unsigned i;
47454 bool ok;
47456 if (d->vmode != V8HImode || !d->one_operand_p)
47457 return false;
47459 /* The two permutations only operate in 64-bit lanes. */
47460 for (i = 0; i < 4; ++i)
47461 if (d->perm[i] >= 4)
47462 return false;
47463 for (i = 4; i < 8; ++i)
47464 if (d->perm[i] < 4)
47465 return false;
47467 if (d->testing_p)
47468 return true;
47470 /* Emit the pshuflw. */
47471 memcpy (perm2, d->perm, 4);
47472 for (i = 4; i < 8; ++i)
47473 perm2[i] = i;
47474 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47475 gcc_assert (ok);
47477 /* Emit the pshufhw. */
47478 memcpy (perm2 + 4, d->perm + 4, 4);
47479 for (i = 0; i < 4; ++i)
47480 perm2[i] = i;
47481 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47482 gcc_assert (ok);
47484 return true;
47487 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47488 the permutation using the SSSE3 palignr instruction. This succeeds
47489 when all of the elements in PERM fit within one vector and we merely
47490 need to shift them down so that a single vector permutation has a
47491 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47492 the vpalignr instruction itself can perform the requested permutation. */
47494 static bool
47495 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47497 unsigned i, nelt = d->nelt;
47498 unsigned min, max, minswap, maxswap;
47499 bool in_order, ok, swap = false;
47500 rtx shift, target;
47501 struct expand_vec_perm_d dcopy;
47503 /* Even with AVX, palignr only operates on 128-bit vectors,
47504 in AVX2 palignr operates on both 128-bit lanes. */
47505 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47506 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47507 return false;
47509 min = 2 * nelt;
47510 max = 0;
47511 minswap = 2 * nelt;
47512 maxswap = 0;
47513 for (i = 0; i < nelt; ++i)
47515 unsigned e = d->perm[i];
47516 unsigned eswap = d->perm[i] ^ nelt;
47517 if (GET_MODE_SIZE (d->vmode) == 32)
47519 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47520 eswap = e ^ (nelt / 2);
47522 if (e < min)
47523 min = e;
47524 if (e > max)
47525 max = e;
47526 if (eswap < minswap)
47527 minswap = eswap;
47528 if (eswap > maxswap)
47529 maxswap = eswap;
47531 if (min == 0
47532 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47534 if (d->one_operand_p
47535 || minswap == 0
47536 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47537 ? nelt / 2 : nelt))
47538 return false;
47539 swap = true;
47540 min = minswap;
47541 max = maxswap;
47544 /* Given that we have SSSE3, we know we'll be able to implement the
47545 single operand permutation after the palignr with pshufb for
47546 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47547 first. */
47548 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47549 return true;
47551 dcopy = *d;
47552 if (swap)
47554 dcopy.op0 = d->op1;
47555 dcopy.op1 = d->op0;
47556 for (i = 0; i < nelt; ++i)
47557 dcopy.perm[i] ^= nelt;
47560 in_order = true;
47561 for (i = 0; i < nelt; ++i)
47563 unsigned e = dcopy.perm[i];
47564 if (GET_MODE_SIZE (d->vmode) == 32
47565 && e >= nelt
47566 && (e & (nelt / 2 - 1)) < min)
47567 e = e - min - (nelt / 2);
47568 else
47569 e = e - min;
47570 if (e != i)
47571 in_order = false;
47572 dcopy.perm[i] = e;
47574 dcopy.one_operand_p = true;
47576 if (single_insn_only_p && !in_order)
47577 return false;
47579 /* For AVX2, test whether we can permute the result in one instruction. */
47580 if (d->testing_p)
47582 if (in_order)
47583 return true;
47584 dcopy.op1 = dcopy.op0;
47585 return expand_vec_perm_1 (&dcopy);
47588 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
47589 if (GET_MODE_SIZE (d->vmode) == 16)
47591 target = gen_reg_rtx (TImode);
47592 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47593 gen_lowpart (TImode, dcopy.op0), shift));
47595 else
47597 target = gen_reg_rtx (V2TImode);
47598 emit_insn (gen_avx2_palignrv2ti (target,
47599 gen_lowpart (V2TImode, dcopy.op1),
47600 gen_lowpart (V2TImode, dcopy.op0),
47601 shift));
47604 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47606 /* Test for the degenerate case where the alignment by itself
47607 produces the desired permutation. */
47608 if (in_order)
47610 emit_move_insn (d->target, dcopy.op0);
47611 return true;
47614 ok = expand_vec_perm_1 (&dcopy);
47615 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47617 return ok;
47620 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
47621 the permutation using the SSE4_1 pblendv instruction. Potentially
47622 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
47624 static bool
47625 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47627 unsigned i, which, nelt = d->nelt;
47628 struct expand_vec_perm_d dcopy, dcopy1;
47629 machine_mode vmode = d->vmode;
47630 bool ok;
47632 /* Use the same checks as in expand_vec_perm_blend. */
47633 if (d->one_operand_p)
47634 return false;
47635 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47637 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47639 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47641 else
47642 return false;
47644 /* Figure out where permutation elements stay not in their
47645 respective lanes. */
47646 for (i = 0, which = 0; i < nelt; ++i)
47648 unsigned e = d->perm[i];
47649 if (e != i)
47650 which |= (e < nelt ? 1 : 2);
47652 /* We can pblend the part where elements stay not in their
47653 respective lanes only when these elements are all in one
47654 half of a permutation.
47655 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
47656 lanes, but both 8 and 9 >= 8
47657 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
47658 respective lanes and 8 >= 8, but 2 not. */
47659 if (which != 1 && which != 2)
47660 return false;
47661 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
47662 return true;
47664 /* First we apply one operand permutation to the part where
47665 elements stay not in their respective lanes. */
47666 dcopy = *d;
47667 if (which == 2)
47668 dcopy.op0 = dcopy.op1 = d->op1;
47669 else
47670 dcopy.op0 = dcopy.op1 = d->op0;
47671 if (!d->testing_p)
47672 dcopy.target = gen_reg_rtx (vmode);
47673 dcopy.one_operand_p = true;
47675 for (i = 0; i < nelt; ++i)
47676 dcopy.perm[i] = d->perm[i] & (nelt - 1);
47678 ok = expand_vec_perm_1 (&dcopy);
47679 if (GET_MODE_SIZE (vmode) != 16 && !ok)
47680 return false;
47681 else
47682 gcc_assert (ok);
47683 if (d->testing_p)
47684 return true;
47686 /* Next we put permuted elements into their positions. */
47687 dcopy1 = *d;
47688 if (which == 2)
47689 dcopy1.op1 = dcopy.target;
47690 else
47691 dcopy1.op0 = dcopy.target;
47693 for (i = 0; i < nelt; ++i)
47694 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
47696 ok = expand_vec_perm_blend (&dcopy1);
47697 gcc_assert (ok);
47699 return true;
47702 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
47704 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47705 a two vector permutation into a single vector permutation by using
47706 an interleave operation to merge the vectors. */
47708 static bool
47709 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
47711 struct expand_vec_perm_d dremap, dfinal;
47712 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
47713 unsigned HOST_WIDE_INT contents;
47714 unsigned char remap[2 * MAX_VECT_LEN];
47715 rtx_insn *seq;
47716 bool ok, same_halves = false;
47718 if (GET_MODE_SIZE (d->vmode) == 16)
47720 if (d->one_operand_p)
47721 return false;
47723 else if (GET_MODE_SIZE (d->vmode) == 32)
47725 if (!TARGET_AVX)
47726 return false;
47727 /* For 32-byte modes allow even d->one_operand_p.
47728 The lack of cross-lane shuffling in some instructions
47729 might prevent a single insn shuffle. */
47730 dfinal = *d;
47731 dfinal.testing_p = true;
47732 /* If expand_vec_perm_interleave3 can expand this into
47733 a 3 insn sequence, give up and let it be expanded as
47734 3 insn sequence. While that is one insn longer,
47735 it doesn't need a memory operand and in the common
47736 case that both interleave low and high permutations
47737 with the same operands are adjacent needs 4 insns
47738 for both after CSE. */
47739 if (expand_vec_perm_interleave3 (&dfinal))
47740 return false;
47742 else
47743 return false;
47745 /* Examine from whence the elements come. */
47746 contents = 0;
47747 for (i = 0; i < nelt; ++i)
47748 contents |= HOST_WIDE_INT_1U << d->perm[i];
47750 memset (remap, 0xff, sizeof (remap));
47751 dremap = *d;
47753 if (GET_MODE_SIZE (d->vmode) == 16)
47755 unsigned HOST_WIDE_INT h1, h2, h3, h4;
47757 /* Split the two input vectors into 4 halves. */
47758 h1 = (HOST_WIDE_INT_1U << nelt2) - 1;
47759 h2 = h1 << nelt2;
47760 h3 = h2 << nelt2;
47761 h4 = h3 << nelt2;
47763 /* If the elements from the low halves use interleave low, and similarly
47764 for interleave high. If the elements are from mis-matched halves, we
47765 can use shufps for V4SF/V4SI or do a DImode shuffle. */
47766 if ((contents & (h1 | h3)) == contents)
47768 /* punpckl* */
47769 for (i = 0; i < nelt2; ++i)
47771 remap[i] = i * 2;
47772 remap[i + nelt] = i * 2 + 1;
47773 dremap.perm[i * 2] = i;
47774 dremap.perm[i * 2 + 1] = i + nelt;
47776 if (!TARGET_SSE2 && d->vmode == V4SImode)
47777 dremap.vmode = V4SFmode;
47779 else if ((contents & (h2 | h4)) == contents)
47781 /* punpckh* */
47782 for (i = 0; i < nelt2; ++i)
47784 remap[i + nelt2] = i * 2;
47785 remap[i + nelt + nelt2] = i * 2 + 1;
47786 dremap.perm[i * 2] = i + nelt2;
47787 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
47789 if (!TARGET_SSE2 && d->vmode == V4SImode)
47790 dremap.vmode = V4SFmode;
47792 else if ((contents & (h1 | h4)) == contents)
47794 /* shufps */
47795 for (i = 0; i < nelt2; ++i)
47797 remap[i] = i;
47798 remap[i + nelt + nelt2] = i + nelt2;
47799 dremap.perm[i] = i;
47800 dremap.perm[i + nelt2] = i + nelt + nelt2;
47802 if (nelt != 4)
47804 /* shufpd */
47805 dremap.vmode = V2DImode;
47806 dremap.nelt = 2;
47807 dremap.perm[0] = 0;
47808 dremap.perm[1] = 3;
47811 else if ((contents & (h2 | h3)) == contents)
47813 /* shufps */
47814 for (i = 0; i < nelt2; ++i)
47816 remap[i + nelt2] = i;
47817 remap[i + nelt] = i + nelt2;
47818 dremap.perm[i] = i + nelt2;
47819 dremap.perm[i + nelt2] = i + nelt;
47821 if (nelt != 4)
47823 /* shufpd */
47824 dremap.vmode = V2DImode;
47825 dremap.nelt = 2;
47826 dremap.perm[0] = 1;
47827 dremap.perm[1] = 2;
47830 else
47831 return false;
47833 else
47835 unsigned int nelt4 = nelt / 4, nzcnt = 0;
47836 unsigned HOST_WIDE_INT q[8];
47837 unsigned int nonzero_halves[4];
47839 /* Split the two input vectors into 8 quarters. */
47840 q[0] = (HOST_WIDE_INT_1U << nelt4) - 1;
47841 for (i = 1; i < 8; ++i)
47842 q[i] = q[0] << (nelt4 * i);
47843 for (i = 0; i < 4; ++i)
47844 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
47846 nonzero_halves[nzcnt] = i;
47847 ++nzcnt;
47850 if (nzcnt == 1)
47852 gcc_assert (d->one_operand_p);
47853 nonzero_halves[1] = nonzero_halves[0];
47854 same_halves = true;
47856 else if (d->one_operand_p)
47858 gcc_assert (nonzero_halves[0] == 0);
47859 gcc_assert (nonzero_halves[1] == 1);
47862 if (nzcnt <= 2)
47864 if (d->perm[0] / nelt2 == nonzero_halves[1])
47866 /* Attempt to increase the likelihood that dfinal
47867 shuffle will be intra-lane. */
47868 char tmph = nonzero_halves[0];
47869 nonzero_halves[0] = nonzero_halves[1];
47870 nonzero_halves[1] = tmph;
47873 /* vperm2f128 or vperm2i128. */
47874 for (i = 0; i < nelt2; ++i)
47876 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
47877 remap[i + nonzero_halves[0] * nelt2] = i;
47878 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
47879 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
47882 if (d->vmode != V8SFmode
47883 && d->vmode != V4DFmode
47884 && d->vmode != V8SImode)
47886 dremap.vmode = V8SImode;
47887 dremap.nelt = 8;
47888 for (i = 0; i < 4; ++i)
47890 dremap.perm[i] = i + nonzero_halves[0] * 4;
47891 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
47895 else if (d->one_operand_p)
47896 return false;
47897 else if (TARGET_AVX2
47898 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
47900 /* vpunpckl* */
47901 for (i = 0; i < nelt4; ++i)
47903 remap[i] = i * 2;
47904 remap[i + nelt] = i * 2 + 1;
47905 remap[i + nelt2] = i * 2 + nelt2;
47906 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
47907 dremap.perm[i * 2] = i;
47908 dremap.perm[i * 2 + 1] = i + nelt;
47909 dremap.perm[i * 2 + nelt2] = i + nelt2;
47910 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
47913 else if (TARGET_AVX2
47914 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
47916 /* vpunpckh* */
47917 for (i = 0; i < nelt4; ++i)
47919 remap[i + nelt4] = i * 2;
47920 remap[i + nelt + nelt4] = i * 2 + 1;
47921 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
47922 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
47923 dremap.perm[i * 2] = i + nelt4;
47924 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
47925 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
47926 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
47929 else
47930 return false;
47933 /* Use the remapping array set up above to move the elements from their
47934 swizzled locations into their final destinations. */
47935 dfinal = *d;
47936 for (i = 0; i < nelt; ++i)
47938 unsigned e = remap[d->perm[i]];
47939 gcc_assert (e < nelt);
47940 /* If same_halves is true, both halves of the remapped vector are the
47941 same. Avoid cross-lane accesses if possible. */
47942 if (same_halves && i >= nelt2)
47944 gcc_assert (e < nelt2);
47945 dfinal.perm[i] = e + nelt2;
47947 else
47948 dfinal.perm[i] = e;
47950 if (!d->testing_p)
47952 dremap.target = gen_reg_rtx (dremap.vmode);
47953 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47955 dfinal.op1 = dfinal.op0;
47956 dfinal.one_operand_p = true;
47958 /* Test if the final remap can be done with a single insn. For V4SFmode or
47959 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
47960 start_sequence ();
47961 ok = expand_vec_perm_1 (&dfinal);
47962 seq = get_insns ();
47963 end_sequence ();
47965 if (!ok)
47966 return false;
47968 if (d->testing_p)
47969 return true;
47971 if (dremap.vmode != dfinal.vmode)
47973 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
47974 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
47977 ok = expand_vec_perm_1 (&dremap);
47978 gcc_assert (ok);
47980 emit_insn (seq);
47981 return true;
47984 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47985 a single vector cross-lane permutation into vpermq followed
47986 by any of the single insn permutations. */
47988 static bool
47989 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
47991 struct expand_vec_perm_d dremap, dfinal;
47992 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
47993 unsigned contents[2];
47994 bool ok;
47996 if (!(TARGET_AVX2
47997 && (d->vmode == V32QImode || d->vmode == V16HImode)
47998 && d->one_operand_p))
47999 return false;
48001 contents[0] = 0;
48002 contents[1] = 0;
48003 for (i = 0; i < nelt2; ++i)
48005 contents[0] |= 1u << (d->perm[i] / nelt4);
48006 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
48009 for (i = 0; i < 2; ++i)
48011 unsigned int cnt = 0;
48012 for (j = 0; j < 4; ++j)
48013 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
48014 return false;
48017 if (d->testing_p)
48018 return true;
48020 dremap = *d;
48021 dremap.vmode = V4DImode;
48022 dremap.nelt = 4;
48023 dremap.target = gen_reg_rtx (V4DImode);
48024 dremap.op0 = gen_lowpart (V4DImode, d->op0);
48025 dremap.op1 = dremap.op0;
48026 dremap.one_operand_p = true;
48027 for (i = 0; i < 2; ++i)
48029 unsigned int cnt = 0;
48030 for (j = 0; j < 4; ++j)
48031 if ((contents[i] & (1u << j)) != 0)
48032 dremap.perm[2 * i + cnt++] = j;
48033 for (; cnt < 2; ++cnt)
48034 dremap.perm[2 * i + cnt] = 0;
48037 dfinal = *d;
48038 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48039 dfinal.op1 = dfinal.op0;
48040 dfinal.one_operand_p = true;
48041 for (i = 0, j = 0; i < nelt; ++i)
48043 if (i == nelt2)
48044 j = 2;
48045 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
48046 if ((d->perm[i] / nelt4) == dremap.perm[j])
48048 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
48049 dfinal.perm[i] |= nelt4;
48050 else
48051 gcc_unreachable ();
48054 ok = expand_vec_perm_1 (&dremap);
48055 gcc_assert (ok);
48057 ok = expand_vec_perm_1 (&dfinal);
48058 gcc_assert (ok);
48060 return true;
48063 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
48064 a vector permutation using two instructions, vperm2f128 resp.
48065 vperm2i128 followed by any single in-lane permutation. */
48067 static bool
48068 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
48070 struct expand_vec_perm_d dfirst, dsecond;
48071 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
48072 bool ok;
48074 if (!TARGET_AVX
48075 || GET_MODE_SIZE (d->vmode) != 32
48076 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
48077 return false;
48079 dsecond = *d;
48080 dsecond.one_operand_p = false;
48081 dsecond.testing_p = true;
48083 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
48084 immediate. For perm < 16 the second permutation uses
48085 d->op0 as first operand, for perm >= 16 it uses d->op1
48086 as first operand. The second operand is the result of
48087 vperm2[fi]128. */
48088 for (perm = 0; perm < 32; perm++)
48090 /* Ignore permutations which do not move anything cross-lane. */
48091 if (perm < 16)
48093 /* The second shuffle for e.g. V4DFmode has
48094 0123 and ABCD operands.
48095 Ignore AB23, as 23 is already in the second lane
48096 of the first operand. */
48097 if ((perm & 0xc) == (1 << 2)) continue;
48098 /* And 01CD, as 01 is in the first lane of the first
48099 operand. */
48100 if ((perm & 3) == 0) continue;
48101 /* And 4567, as then the vperm2[fi]128 doesn't change
48102 anything on the original 4567 second operand. */
48103 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
48105 else
48107 /* The second shuffle for e.g. V4DFmode has
48108 4567 and ABCD operands.
48109 Ignore AB67, as 67 is already in the second lane
48110 of the first operand. */
48111 if ((perm & 0xc) == (3 << 2)) continue;
48112 /* And 45CD, as 45 is in the first lane of the first
48113 operand. */
48114 if ((perm & 3) == 2) continue;
48115 /* And 0123, as then the vperm2[fi]128 doesn't change
48116 anything on the original 0123 first operand. */
48117 if ((perm & 0xf) == (1 << 2)) continue;
48120 for (i = 0; i < nelt; i++)
48122 j = d->perm[i] / nelt2;
48123 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
48124 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
48125 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
48126 dsecond.perm[i] = d->perm[i] & (nelt - 1);
48127 else
48128 break;
48131 if (i == nelt)
48133 start_sequence ();
48134 ok = expand_vec_perm_1 (&dsecond);
48135 end_sequence ();
48137 else
48138 ok = false;
48140 if (ok)
48142 if (d->testing_p)
48143 return true;
48145 /* Found a usable second shuffle. dfirst will be
48146 vperm2f128 on d->op0 and d->op1. */
48147 dsecond.testing_p = false;
48148 dfirst = *d;
48149 dfirst.target = gen_reg_rtx (d->vmode);
48150 for (i = 0; i < nelt; i++)
48151 dfirst.perm[i] = (i & (nelt2 - 1))
48152 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
48154 canonicalize_perm (&dfirst);
48155 ok = expand_vec_perm_1 (&dfirst);
48156 gcc_assert (ok);
48158 /* And dsecond is some single insn shuffle, taking
48159 d->op0 and result of vperm2f128 (if perm < 16) or
48160 d->op1 and result of vperm2f128 (otherwise). */
48161 if (perm >= 16)
48162 dsecond.op0 = dsecond.op1;
48163 dsecond.op1 = dfirst.target;
48165 ok = expand_vec_perm_1 (&dsecond);
48166 gcc_assert (ok);
48168 return true;
48171 /* For one operand, the only useful vperm2f128 permutation is 0x01
48172 aka lanes swap. */
48173 if (d->one_operand_p)
48174 return false;
48177 return false;
48180 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48181 a two vector permutation using 2 intra-lane interleave insns
48182 and cross-lane shuffle for 32-byte vectors. */
48184 static bool
48185 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
48187 unsigned i, nelt;
48188 rtx (*gen) (rtx, rtx, rtx);
48190 if (d->one_operand_p)
48191 return false;
48192 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
48194 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
48196 else
48197 return false;
48199 nelt = d->nelt;
48200 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
48201 return false;
48202 for (i = 0; i < nelt; i += 2)
48203 if (d->perm[i] != d->perm[0] + i / 2
48204 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
48205 return false;
48207 if (d->testing_p)
48208 return true;
48210 switch (d->vmode)
48212 case V32QImode:
48213 if (d->perm[0])
48214 gen = gen_vec_interleave_highv32qi;
48215 else
48216 gen = gen_vec_interleave_lowv32qi;
48217 break;
48218 case V16HImode:
48219 if (d->perm[0])
48220 gen = gen_vec_interleave_highv16hi;
48221 else
48222 gen = gen_vec_interleave_lowv16hi;
48223 break;
48224 case V8SImode:
48225 if (d->perm[0])
48226 gen = gen_vec_interleave_highv8si;
48227 else
48228 gen = gen_vec_interleave_lowv8si;
48229 break;
48230 case V4DImode:
48231 if (d->perm[0])
48232 gen = gen_vec_interleave_highv4di;
48233 else
48234 gen = gen_vec_interleave_lowv4di;
48235 break;
48236 case V8SFmode:
48237 if (d->perm[0])
48238 gen = gen_vec_interleave_highv8sf;
48239 else
48240 gen = gen_vec_interleave_lowv8sf;
48241 break;
48242 case V4DFmode:
48243 if (d->perm[0])
48244 gen = gen_vec_interleave_highv4df;
48245 else
48246 gen = gen_vec_interleave_lowv4df;
48247 break;
48248 default:
48249 gcc_unreachable ();
48252 emit_insn (gen (d->target, d->op0, d->op1));
48253 return true;
48256 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
48257 a single vector permutation using a single intra-lane vector
48258 permutation, vperm2f128 swapping the lanes and vblend* insn blending
48259 the non-swapped and swapped vectors together. */
48261 static bool
48262 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
48264 struct expand_vec_perm_d dfirst, dsecond;
48265 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
48266 rtx_insn *seq;
48267 bool ok;
48268 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
48270 if (!TARGET_AVX
48271 || TARGET_AVX2
48272 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
48273 || !d->one_operand_p)
48274 return false;
48276 dfirst = *d;
48277 for (i = 0; i < nelt; i++)
48278 dfirst.perm[i] = 0xff;
48279 for (i = 0, msk = 0; i < nelt; i++)
48281 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
48282 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
48283 return false;
48284 dfirst.perm[j] = d->perm[i];
48285 if (j != i)
48286 msk |= (1 << i);
48288 for (i = 0; i < nelt; i++)
48289 if (dfirst.perm[i] == 0xff)
48290 dfirst.perm[i] = i;
48292 if (!d->testing_p)
48293 dfirst.target = gen_reg_rtx (dfirst.vmode);
48295 start_sequence ();
48296 ok = expand_vec_perm_1 (&dfirst);
48297 seq = get_insns ();
48298 end_sequence ();
48300 if (!ok)
48301 return false;
48303 if (d->testing_p)
48304 return true;
48306 emit_insn (seq);
48308 dsecond = *d;
48309 dsecond.op0 = dfirst.target;
48310 dsecond.op1 = dfirst.target;
48311 dsecond.one_operand_p = true;
48312 dsecond.target = gen_reg_rtx (dsecond.vmode);
48313 for (i = 0; i < nelt; i++)
48314 dsecond.perm[i] = i ^ nelt2;
48316 ok = expand_vec_perm_1 (&dsecond);
48317 gcc_assert (ok);
48319 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48320 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48321 return true;
48324 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
48325 permutation using two vperm2f128, followed by a vshufpd insn blending
48326 the two vectors together. */
48328 static bool
48329 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48331 struct expand_vec_perm_d dfirst, dsecond, dthird;
48332 bool ok;
48334 if (!TARGET_AVX || (d->vmode != V4DFmode))
48335 return false;
48337 if (d->testing_p)
48338 return true;
48340 dfirst = *d;
48341 dsecond = *d;
48342 dthird = *d;
48344 dfirst.perm[0] = (d->perm[0] & ~1);
48345 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48346 dfirst.perm[2] = (d->perm[2] & ~1);
48347 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48348 dsecond.perm[0] = (d->perm[1] & ~1);
48349 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48350 dsecond.perm[2] = (d->perm[3] & ~1);
48351 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48352 dthird.perm[0] = (d->perm[0] % 2);
48353 dthird.perm[1] = (d->perm[1] % 2) + 4;
48354 dthird.perm[2] = (d->perm[2] % 2) + 2;
48355 dthird.perm[3] = (d->perm[3] % 2) + 6;
48357 dfirst.target = gen_reg_rtx (dfirst.vmode);
48358 dsecond.target = gen_reg_rtx (dsecond.vmode);
48359 dthird.op0 = dfirst.target;
48360 dthird.op1 = dsecond.target;
48361 dthird.one_operand_p = false;
48363 canonicalize_perm (&dfirst);
48364 canonicalize_perm (&dsecond);
48366 ok = expand_vec_perm_1 (&dfirst)
48367 && expand_vec_perm_1 (&dsecond)
48368 && expand_vec_perm_1 (&dthird);
48370 gcc_assert (ok);
48372 return true;
48375 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
48376 permutation with two pshufb insns and an ior. We should have already
48377 failed all two instruction sequences. */
48379 static bool
48380 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48382 rtx rperm[2][16], vperm, l, h, op, m128;
48383 unsigned int i, nelt, eltsz;
48385 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48386 return false;
48387 gcc_assert (!d->one_operand_p);
48389 if (d->testing_p)
48390 return true;
48392 nelt = d->nelt;
48393 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48395 /* Generate two permutation masks. If the required element is within
48396 the given vector it is shuffled into the proper lane. If the required
48397 element is in the other vector, force a zero into the lane by setting
48398 bit 7 in the permutation mask. */
48399 m128 = GEN_INT (-128);
48400 for (i = 0; i < nelt; ++i)
48402 unsigned j, e = d->perm[i];
48403 unsigned which = (e >= nelt);
48404 if (e >= nelt)
48405 e -= nelt;
48407 for (j = 0; j < eltsz; ++j)
48409 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48410 rperm[1-which][i*eltsz + j] = m128;
48414 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48415 vperm = force_reg (V16QImode, vperm);
48417 l = gen_reg_rtx (V16QImode);
48418 op = gen_lowpart (V16QImode, d->op0);
48419 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48421 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48422 vperm = force_reg (V16QImode, vperm);
48424 h = gen_reg_rtx (V16QImode);
48425 op = gen_lowpart (V16QImode, d->op1);
48426 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48428 op = d->target;
48429 if (d->vmode != V16QImode)
48430 op = gen_reg_rtx (V16QImode);
48431 emit_insn (gen_iorv16qi3 (op, l, h));
48432 if (op != d->target)
48433 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48435 return true;
48438 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48439 with two vpshufb insns, vpermq and vpor. We should have already failed
48440 all two or three instruction sequences. */
48442 static bool
48443 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48445 rtx rperm[2][32], vperm, l, h, hp, op, m128;
48446 unsigned int i, nelt, eltsz;
48448 if (!TARGET_AVX2
48449 || !d->one_operand_p
48450 || (d->vmode != V32QImode && d->vmode != V16HImode))
48451 return false;
48453 if (d->testing_p)
48454 return true;
48456 nelt = d->nelt;
48457 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48459 /* Generate two permutation masks. If the required element is within
48460 the same lane, it is shuffled in. If the required element from the
48461 other lane, force a zero by setting bit 7 in the permutation mask.
48462 In the other mask the mask has non-negative elements if element
48463 is requested from the other lane, but also moved to the other lane,
48464 so that the result of vpshufb can have the two V2TImode halves
48465 swapped. */
48466 m128 = GEN_INT (-128);
48467 for (i = 0; i < nelt; ++i)
48469 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48470 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48472 for (j = 0; j < eltsz; ++j)
48474 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48475 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48479 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48480 vperm = force_reg (V32QImode, vperm);
48482 h = gen_reg_rtx (V32QImode);
48483 op = gen_lowpart (V32QImode, d->op0);
48484 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48486 /* Swap the 128-byte lanes of h into hp. */
48487 hp = gen_reg_rtx (V4DImode);
48488 op = gen_lowpart (V4DImode, h);
48489 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48490 const1_rtx));
48492 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48493 vperm = force_reg (V32QImode, vperm);
48495 l = gen_reg_rtx (V32QImode);
48496 op = gen_lowpart (V32QImode, d->op0);
48497 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48499 op = d->target;
48500 if (d->vmode != V32QImode)
48501 op = gen_reg_rtx (V32QImode);
48502 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48503 if (op != d->target)
48504 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48506 return true;
48509 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48510 and extract-odd permutations of two V32QImode and V16QImode operand
48511 with two vpshufb insns, vpor and vpermq. We should have already
48512 failed all two or three instruction sequences. */
48514 static bool
48515 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48517 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48518 unsigned int i, nelt, eltsz;
48520 if (!TARGET_AVX2
48521 || d->one_operand_p
48522 || (d->vmode != V32QImode && d->vmode != V16HImode))
48523 return false;
48525 for (i = 0; i < d->nelt; ++i)
48526 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48527 return false;
48529 if (d->testing_p)
48530 return true;
48532 nelt = d->nelt;
48533 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48535 /* Generate two permutation masks. In the first permutation mask
48536 the first quarter will contain indexes for the first half
48537 of the op0, the second quarter will contain bit 7 set, third quarter
48538 will contain indexes for the second half of the op0 and the
48539 last quarter bit 7 set. In the second permutation mask
48540 the first quarter will contain bit 7 set, the second quarter
48541 indexes for the first half of the op1, the third quarter bit 7 set
48542 and last quarter indexes for the second half of the op1.
48543 I.e. the first mask e.g. for V32QImode extract even will be:
48544 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48545 (all values masked with 0xf except for -128) and second mask
48546 for extract even will be
48547 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48548 m128 = GEN_INT (-128);
48549 for (i = 0; i < nelt; ++i)
48551 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48552 unsigned which = d->perm[i] >= nelt;
48553 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48555 for (j = 0; j < eltsz; ++j)
48557 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48558 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48562 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48563 vperm = force_reg (V32QImode, vperm);
48565 l = gen_reg_rtx (V32QImode);
48566 op = gen_lowpart (V32QImode, d->op0);
48567 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48569 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48570 vperm = force_reg (V32QImode, vperm);
48572 h = gen_reg_rtx (V32QImode);
48573 op = gen_lowpart (V32QImode, d->op1);
48574 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48576 ior = gen_reg_rtx (V32QImode);
48577 emit_insn (gen_iorv32qi3 (ior, l, h));
48579 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48580 op = gen_reg_rtx (V4DImode);
48581 ior = gen_lowpart (V4DImode, ior);
48582 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48583 const1_rtx, GEN_INT (3)));
48584 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48586 return true;
48589 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48590 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48591 with two "and" and "pack" or two "shift" and "pack" insns. We should
48592 have already failed all two instruction sequences. */
48594 static bool
48595 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48597 rtx op, dop0, dop1, t, rperm[16];
48598 unsigned i, odd, c, s, nelt = d->nelt;
48599 bool end_perm = false;
48600 machine_mode half_mode;
48601 rtx (*gen_and) (rtx, rtx, rtx);
48602 rtx (*gen_pack) (rtx, rtx, rtx);
48603 rtx (*gen_shift) (rtx, rtx, rtx);
48605 if (d->one_operand_p)
48606 return false;
48608 switch (d->vmode)
48610 case V8HImode:
48611 /* Required for "pack". */
48612 if (!TARGET_SSE4_1)
48613 return false;
48614 c = 0xffff;
48615 s = 16;
48616 half_mode = V4SImode;
48617 gen_and = gen_andv4si3;
48618 gen_pack = gen_sse4_1_packusdw;
48619 gen_shift = gen_lshrv4si3;
48620 break;
48621 case V16QImode:
48622 /* No check as all instructions are SSE2. */
48623 c = 0xff;
48624 s = 8;
48625 half_mode = V8HImode;
48626 gen_and = gen_andv8hi3;
48627 gen_pack = gen_sse2_packuswb;
48628 gen_shift = gen_lshrv8hi3;
48629 break;
48630 case V16HImode:
48631 if (!TARGET_AVX2)
48632 return false;
48633 c = 0xffff;
48634 s = 16;
48635 half_mode = V8SImode;
48636 gen_and = gen_andv8si3;
48637 gen_pack = gen_avx2_packusdw;
48638 gen_shift = gen_lshrv8si3;
48639 end_perm = true;
48640 break;
48641 case V32QImode:
48642 if (!TARGET_AVX2)
48643 return false;
48644 c = 0xff;
48645 s = 8;
48646 half_mode = V16HImode;
48647 gen_and = gen_andv16hi3;
48648 gen_pack = gen_avx2_packuswb;
48649 gen_shift = gen_lshrv16hi3;
48650 end_perm = true;
48651 break;
48652 default:
48653 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
48654 general shuffles. */
48655 return false;
48658 /* Check that permutation is even or odd. */
48659 odd = d->perm[0];
48660 if (odd > 1)
48661 return false;
48663 for (i = 1; i < nelt; ++i)
48664 if (d->perm[i] != 2 * i + odd)
48665 return false;
48667 if (d->testing_p)
48668 return true;
48670 dop0 = gen_reg_rtx (half_mode);
48671 dop1 = gen_reg_rtx (half_mode);
48672 if (odd == 0)
48674 for (i = 0; i < nelt / 2; i++)
48675 rperm[i] = GEN_INT (c);
48676 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
48677 t = force_reg (half_mode, t);
48678 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
48679 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
48681 else
48683 emit_insn (gen_shift (dop0,
48684 gen_lowpart (half_mode, d->op0),
48685 GEN_INT (s)));
48686 emit_insn (gen_shift (dop1,
48687 gen_lowpart (half_mode, d->op1),
48688 GEN_INT (s)));
48690 /* In AVX2 for 256 bit case we need to permute pack result. */
48691 if (TARGET_AVX2 && end_perm)
48693 op = gen_reg_rtx (d->vmode);
48694 t = gen_reg_rtx (V4DImode);
48695 emit_insn (gen_pack (op, dop0, dop1));
48696 emit_insn (gen_avx2_permv4di_1 (t,
48697 gen_lowpart (V4DImode, op),
48698 const0_rtx,
48699 const2_rtx,
48700 const1_rtx,
48701 GEN_INT (3)));
48702 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
48704 else
48705 emit_insn (gen_pack (d->target, dop0, dop1));
48707 return true;
48710 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
48711 and extract-odd permutations. */
48713 static bool
48714 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
48716 rtx t1, t2, t3, t4, t5;
48718 switch (d->vmode)
48720 case V4DFmode:
48721 if (d->testing_p)
48722 break;
48723 t1 = gen_reg_rtx (V4DFmode);
48724 t2 = gen_reg_rtx (V4DFmode);
48726 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48727 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
48728 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
48730 /* Now an unpck[lh]pd will produce the result required. */
48731 if (odd)
48732 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
48733 else
48734 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
48735 emit_insn (t3);
48736 break;
48738 case V8SFmode:
48740 int mask = odd ? 0xdd : 0x88;
48742 if (d->testing_p)
48743 break;
48744 t1 = gen_reg_rtx (V8SFmode);
48745 t2 = gen_reg_rtx (V8SFmode);
48746 t3 = gen_reg_rtx (V8SFmode);
48748 /* Shuffle within the 128-bit lanes to produce:
48749 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
48750 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
48751 GEN_INT (mask)));
48753 /* Shuffle the lanes around to produce:
48754 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
48755 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
48756 GEN_INT (0x3)));
48758 /* Shuffle within the 128-bit lanes to produce:
48759 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
48760 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
48762 /* Shuffle within the 128-bit lanes to produce:
48763 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
48764 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
48766 /* Shuffle the lanes around to produce:
48767 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
48768 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
48769 GEN_INT (0x20)));
48771 break;
48773 case V2DFmode:
48774 case V4SFmode:
48775 case V2DImode:
48776 case V4SImode:
48777 /* These are always directly implementable by expand_vec_perm_1. */
48778 gcc_unreachable ();
48780 case V8HImode:
48781 if (TARGET_SSE4_1)
48782 return expand_vec_perm_even_odd_pack (d);
48783 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
48784 return expand_vec_perm_pshufb2 (d);
48785 else
48787 if (d->testing_p)
48788 break;
48789 /* We need 2*log2(N)-1 operations to achieve odd/even
48790 with interleave. */
48791 t1 = gen_reg_rtx (V8HImode);
48792 t2 = gen_reg_rtx (V8HImode);
48793 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
48794 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
48795 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
48796 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
48797 if (odd)
48798 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
48799 else
48800 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
48801 emit_insn (t3);
48803 break;
48805 case V16QImode:
48806 return expand_vec_perm_even_odd_pack (d);
48808 case V16HImode:
48809 case V32QImode:
48810 return expand_vec_perm_even_odd_pack (d);
48812 case V4DImode:
48813 if (!TARGET_AVX2)
48815 struct expand_vec_perm_d d_copy = *d;
48816 d_copy.vmode = V4DFmode;
48817 if (d->testing_p)
48818 d_copy.target = gen_lowpart (V4DFmode, d->target);
48819 else
48820 d_copy.target = gen_reg_rtx (V4DFmode);
48821 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
48822 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
48823 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48825 if (!d->testing_p)
48826 emit_move_insn (d->target,
48827 gen_lowpart (V4DImode, d_copy.target));
48828 return true;
48830 return false;
48833 if (d->testing_p)
48834 break;
48836 t1 = gen_reg_rtx (V4DImode);
48837 t2 = gen_reg_rtx (V4DImode);
48839 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48840 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
48841 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
48843 /* Now an vpunpck[lh]qdq will produce the result required. */
48844 if (odd)
48845 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
48846 else
48847 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
48848 emit_insn (t3);
48849 break;
48851 case V8SImode:
48852 if (!TARGET_AVX2)
48854 struct expand_vec_perm_d d_copy = *d;
48855 d_copy.vmode = V8SFmode;
48856 if (d->testing_p)
48857 d_copy.target = gen_lowpart (V8SFmode, d->target);
48858 else
48859 d_copy.target = gen_reg_rtx (V8SFmode);
48860 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
48861 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
48862 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48864 if (!d->testing_p)
48865 emit_move_insn (d->target,
48866 gen_lowpart (V8SImode, d_copy.target));
48867 return true;
48869 return false;
48872 if (d->testing_p)
48873 break;
48875 t1 = gen_reg_rtx (V8SImode);
48876 t2 = gen_reg_rtx (V8SImode);
48877 t3 = gen_reg_rtx (V4DImode);
48878 t4 = gen_reg_rtx (V4DImode);
48879 t5 = gen_reg_rtx (V4DImode);
48881 /* Shuffle the lanes around into
48882 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
48883 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
48884 gen_lowpart (V4DImode, d->op1),
48885 GEN_INT (0x20)));
48886 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
48887 gen_lowpart (V4DImode, d->op1),
48888 GEN_INT (0x31)));
48890 /* Swap the 2nd and 3rd position in each lane into
48891 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
48892 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
48893 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48894 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
48895 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48897 /* Now an vpunpck[lh]qdq will produce
48898 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
48899 if (odd)
48900 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
48901 gen_lowpart (V4DImode, t2));
48902 else
48903 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
48904 gen_lowpart (V4DImode, t2));
48905 emit_insn (t3);
48906 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
48907 break;
48909 default:
48910 gcc_unreachable ();
48913 return true;
48916 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48917 extract-even and extract-odd permutations. */
48919 static bool
48920 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
48922 unsigned i, odd, nelt = d->nelt;
48924 odd = d->perm[0];
48925 if (odd != 0 && odd != 1)
48926 return false;
48928 for (i = 1; i < nelt; ++i)
48929 if (d->perm[i] != 2 * i + odd)
48930 return false;
48932 return expand_vec_perm_even_odd_1 (d, odd);
48935 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
48936 permutations. We assume that expand_vec_perm_1 has already failed. */
48938 static bool
48939 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
48941 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
48942 machine_mode vmode = d->vmode;
48943 unsigned char perm2[4];
48944 rtx op0 = d->op0, dest;
48945 bool ok;
48947 switch (vmode)
48949 case V4DFmode:
48950 case V8SFmode:
48951 /* These are special-cased in sse.md so that we can optionally
48952 use the vbroadcast instruction. They expand to two insns
48953 if the input happens to be in a register. */
48954 gcc_unreachable ();
48956 case V2DFmode:
48957 case V2DImode:
48958 case V4SFmode:
48959 case V4SImode:
48960 /* These are always implementable using standard shuffle patterns. */
48961 gcc_unreachable ();
48963 case V8HImode:
48964 case V16QImode:
48965 /* These can be implemented via interleave. We save one insn by
48966 stopping once we have promoted to V4SImode and then use pshufd. */
48967 if (d->testing_p)
48968 return true;
48971 rtx dest;
48972 rtx (*gen) (rtx, rtx, rtx)
48973 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
48974 : gen_vec_interleave_lowv8hi;
48976 if (elt >= nelt2)
48978 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
48979 : gen_vec_interleave_highv8hi;
48980 elt -= nelt2;
48982 nelt2 /= 2;
48984 dest = gen_reg_rtx (vmode);
48985 emit_insn (gen (dest, op0, op0));
48986 vmode = get_mode_wider_vector (vmode);
48987 op0 = gen_lowpart (vmode, dest);
48989 while (vmode != V4SImode);
48991 memset (perm2, elt, 4);
48992 dest = gen_reg_rtx (V4SImode);
48993 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
48994 gcc_assert (ok);
48995 if (!d->testing_p)
48996 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
48997 return true;
48999 case V64QImode:
49000 case V32QImode:
49001 case V16HImode:
49002 case V8SImode:
49003 case V4DImode:
49004 /* For AVX2 broadcasts of the first element vpbroadcast* or
49005 vpermq should be used by expand_vec_perm_1. */
49006 gcc_assert (!TARGET_AVX2 || d->perm[0]);
49007 return false;
49009 default:
49010 gcc_unreachable ();
49014 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
49015 broadcast permutations. */
49017 static bool
49018 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
49020 unsigned i, elt, nelt = d->nelt;
49022 if (!d->one_operand_p)
49023 return false;
49025 elt = d->perm[0];
49026 for (i = 1; i < nelt; ++i)
49027 if (d->perm[i] != elt)
49028 return false;
49030 return expand_vec_perm_broadcast_1 (d);
49033 /* Implement arbitrary permutations of two V64QImode operands
49034 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
49035 static bool
49036 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
49038 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
49039 return false;
49041 if (d->testing_p)
49042 return true;
49044 struct expand_vec_perm_d ds[2];
49045 rtx rperm[128], vperm, target0, target1;
49046 unsigned int i, nelt;
49047 machine_mode vmode;
49049 nelt = d->nelt;
49050 vmode = V64QImode;
49052 for (i = 0; i < 2; i++)
49054 ds[i] = *d;
49055 ds[i].vmode = V32HImode;
49056 ds[i].nelt = 32;
49057 ds[i].target = gen_reg_rtx (V32HImode);
49058 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
49059 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
49062 /* Prepare permutations such that the first one takes care of
49063 putting the even bytes into the right positions or one higher
49064 positions (ds[0]) and the second one takes care of
49065 putting the odd bytes into the right positions or one below
49066 (ds[1]). */
49068 for (i = 0; i < nelt; i++)
49070 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
49071 if (i & 1)
49073 rperm[i] = constm1_rtx;
49074 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49076 else
49078 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49079 rperm[i + 64] = constm1_rtx;
49083 bool ok = expand_vec_perm_1 (&ds[0]);
49084 gcc_assert (ok);
49085 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
49087 ok = expand_vec_perm_1 (&ds[1]);
49088 gcc_assert (ok);
49089 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
49091 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
49092 vperm = force_reg (vmode, vperm);
49093 target0 = gen_reg_rtx (V64QImode);
49094 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
49096 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
49097 vperm = force_reg (vmode, vperm);
49098 target1 = gen_reg_rtx (V64QImode);
49099 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
49101 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
49102 return true;
49105 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
49106 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
49107 all the shorter instruction sequences. */
49109 static bool
49110 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
49112 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
49113 unsigned int i, nelt, eltsz;
49114 bool used[4];
49116 if (!TARGET_AVX2
49117 || d->one_operand_p
49118 || (d->vmode != V32QImode && d->vmode != V16HImode))
49119 return false;
49121 if (d->testing_p)
49122 return true;
49124 nelt = d->nelt;
49125 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
49127 /* Generate 4 permutation masks. If the required element is within
49128 the same lane, it is shuffled in. If the required element from the
49129 other lane, force a zero by setting bit 7 in the permutation mask.
49130 In the other mask the mask has non-negative elements if element
49131 is requested from the other lane, but also moved to the other lane,
49132 so that the result of vpshufb can have the two V2TImode halves
49133 swapped. */
49134 m128 = GEN_INT (-128);
49135 for (i = 0; i < 32; ++i)
49137 rperm[0][i] = m128;
49138 rperm[1][i] = m128;
49139 rperm[2][i] = m128;
49140 rperm[3][i] = m128;
49142 used[0] = false;
49143 used[1] = false;
49144 used[2] = false;
49145 used[3] = false;
49146 for (i = 0; i < nelt; ++i)
49148 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
49149 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
49150 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
49152 for (j = 0; j < eltsz; ++j)
49153 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
49154 used[which] = true;
49157 for (i = 0; i < 2; ++i)
49159 if (!used[2 * i + 1])
49161 h[i] = NULL_RTX;
49162 continue;
49164 vperm = gen_rtx_CONST_VECTOR (V32QImode,
49165 gen_rtvec_v (32, rperm[2 * i + 1]));
49166 vperm = force_reg (V32QImode, vperm);
49167 h[i] = gen_reg_rtx (V32QImode);
49168 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49169 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
49172 /* Swap the 128-byte lanes of h[X]. */
49173 for (i = 0; i < 2; ++i)
49175 if (h[i] == NULL_RTX)
49176 continue;
49177 op = gen_reg_rtx (V4DImode);
49178 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
49179 const2_rtx, GEN_INT (3), const0_rtx,
49180 const1_rtx));
49181 h[i] = gen_lowpart (V32QImode, op);
49184 for (i = 0; i < 2; ++i)
49186 if (!used[2 * i])
49188 l[i] = NULL_RTX;
49189 continue;
49191 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
49192 vperm = force_reg (V32QImode, vperm);
49193 l[i] = gen_reg_rtx (V32QImode);
49194 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49195 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
49198 for (i = 0; i < 2; ++i)
49200 if (h[i] && l[i])
49202 op = gen_reg_rtx (V32QImode);
49203 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
49204 l[i] = op;
49206 else if (h[i])
49207 l[i] = h[i];
49210 gcc_assert (l[0] && l[1]);
49211 op = d->target;
49212 if (d->vmode != V32QImode)
49213 op = gen_reg_rtx (V32QImode);
49214 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
49215 if (op != d->target)
49216 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49217 return true;
49220 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
49221 With all of the interface bits taken care of, perform the expansion
49222 in D and return true on success. */
49224 static bool
49225 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
49227 /* Try a single instruction expansion. */
49228 if (expand_vec_perm_1 (d))
49229 return true;
49231 /* Try sequences of two instructions. */
49233 if (expand_vec_perm_pshuflw_pshufhw (d))
49234 return true;
49236 if (expand_vec_perm_palignr (d, false))
49237 return true;
49239 if (expand_vec_perm_interleave2 (d))
49240 return true;
49242 if (expand_vec_perm_broadcast (d))
49243 return true;
49245 if (expand_vec_perm_vpermq_perm_1 (d))
49246 return true;
49248 if (expand_vec_perm_vperm2f128 (d))
49249 return true;
49251 if (expand_vec_perm_pblendv (d))
49252 return true;
49254 /* Try sequences of three instructions. */
49256 if (expand_vec_perm_even_odd_pack (d))
49257 return true;
49259 if (expand_vec_perm_2vperm2f128_vshuf (d))
49260 return true;
49262 if (expand_vec_perm_pshufb2 (d))
49263 return true;
49265 if (expand_vec_perm_interleave3 (d))
49266 return true;
49268 if (expand_vec_perm_vperm2f128_vblend (d))
49269 return true;
49271 /* Try sequences of four instructions. */
49273 if (expand_vec_perm_vpshufb2_vpermq (d))
49274 return true;
49276 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
49277 return true;
49279 if (expand_vec_perm_vpermi2_vpshub2 (d))
49280 return true;
49282 /* ??? Look for narrow permutations whose element orderings would
49283 allow the promotion to a wider mode. */
49285 /* ??? Look for sequences of interleave or a wider permute that place
49286 the data into the correct lanes for a half-vector shuffle like
49287 pshuf[lh]w or vpermilps. */
49289 /* ??? Look for sequences of interleave that produce the desired results.
49290 The combinatorics of punpck[lh] get pretty ugly... */
49292 if (expand_vec_perm_even_odd (d))
49293 return true;
49295 /* Even longer sequences. */
49296 if (expand_vec_perm_vpshufb4_vpermq2 (d))
49297 return true;
49299 return false;
49302 /* If a permutation only uses one operand, make it clear. Returns true
49303 if the permutation references both operands. */
49305 static bool
49306 canonicalize_perm (struct expand_vec_perm_d *d)
49308 int i, which, nelt = d->nelt;
49310 for (i = which = 0; i < nelt; ++i)
49311 which |= (d->perm[i] < nelt ? 1 : 2);
49313 d->one_operand_p = true;
49314 switch (which)
49316 default:
49317 gcc_unreachable();
49319 case 3:
49320 if (!rtx_equal_p (d->op0, d->op1))
49322 d->one_operand_p = false;
49323 break;
49325 /* The elements of PERM do not suggest that only the first operand
49326 is used, but both operands are identical. Allow easier matching
49327 of the permutation by folding the permutation into the single
49328 input vector. */
49329 /* FALLTHRU */
49331 case 2:
49332 for (i = 0; i < nelt; ++i)
49333 d->perm[i] &= nelt - 1;
49334 d->op0 = d->op1;
49335 break;
49337 case 1:
49338 d->op1 = d->op0;
49339 break;
49342 return (which == 3);
49345 bool
49346 ix86_expand_vec_perm_const (rtx operands[4])
49348 struct expand_vec_perm_d d;
49349 unsigned char perm[MAX_VECT_LEN];
49350 int i, nelt;
49351 bool two_args;
49352 rtx sel;
49354 d.target = operands[0];
49355 d.op0 = operands[1];
49356 d.op1 = operands[2];
49357 sel = operands[3];
49359 d.vmode = GET_MODE (d.target);
49360 gcc_assert (VECTOR_MODE_P (d.vmode));
49361 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49362 d.testing_p = false;
49364 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
49365 gcc_assert (XVECLEN (sel, 0) == nelt);
49366 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49368 for (i = 0; i < nelt; ++i)
49370 rtx e = XVECEXP (sel, 0, i);
49371 int ei = INTVAL (e) & (2 * nelt - 1);
49372 d.perm[i] = ei;
49373 perm[i] = ei;
49376 two_args = canonicalize_perm (&d);
49378 if (ix86_expand_vec_perm_const_1 (&d))
49379 return true;
49381 /* If the selector says both arguments are needed, but the operands are the
49382 same, the above tried to expand with one_operand_p and flattened selector.
49383 If that didn't work, retry without one_operand_p; we succeeded with that
49384 during testing. */
49385 if (two_args && d.one_operand_p)
49387 d.one_operand_p = false;
49388 memcpy (d.perm, perm, sizeof (perm));
49389 return ix86_expand_vec_perm_const_1 (&d);
49392 return false;
49395 /* Implement targetm.vectorize.vec_perm_const_ok. */
49397 static bool
49398 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
49399 const unsigned char *sel)
49401 struct expand_vec_perm_d d;
49402 unsigned int i, nelt, which;
49403 bool ret;
49405 d.vmode = vmode;
49406 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49407 d.testing_p = true;
49409 /* Given sufficient ISA support we can just return true here
49410 for selected vector modes. */
49411 switch (d.vmode)
49413 case V16SFmode:
49414 case V16SImode:
49415 case V8DImode:
49416 case V8DFmode:
49417 if (TARGET_AVX512F)
49418 /* All implementable with a single vpermi2 insn. */
49419 return true;
49420 break;
49421 case V32HImode:
49422 if (TARGET_AVX512BW)
49423 /* All implementable with a single vpermi2 insn. */
49424 return true;
49425 break;
49426 case V64QImode:
49427 if (TARGET_AVX512BW)
49428 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
49429 return true;
49430 break;
49431 case V8SImode:
49432 case V8SFmode:
49433 case V4DFmode:
49434 case V4DImode:
49435 if (TARGET_AVX512VL)
49436 /* All implementable with a single vpermi2 insn. */
49437 return true;
49438 break;
49439 case V16HImode:
49440 if (TARGET_AVX2)
49441 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49442 return true;
49443 break;
49444 case V32QImode:
49445 if (TARGET_AVX2)
49446 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49447 return true;
49448 break;
49449 case V4SImode:
49450 case V4SFmode:
49451 case V8HImode:
49452 case V16QImode:
49453 /* All implementable with a single vpperm insn. */
49454 if (TARGET_XOP)
49455 return true;
49456 /* All implementable with 2 pshufb + 1 ior. */
49457 if (TARGET_SSSE3)
49458 return true;
49459 break;
49460 case V2DImode:
49461 case V2DFmode:
49462 /* All implementable with shufpd or unpck[lh]pd. */
49463 return true;
49464 default:
49465 return false;
49468 /* Extract the values from the vector CST into the permutation
49469 array in D. */
49470 memcpy (d.perm, sel, nelt);
49471 for (i = which = 0; i < nelt; ++i)
49473 unsigned char e = d.perm[i];
49474 gcc_assert (e < 2 * nelt);
49475 which |= (e < nelt ? 1 : 2);
49478 /* For all elements from second vector, fold the elements to first. */
49479 if (which == 2)
49480 for (i = 0; i < nelt; ++i)
49481 d.perm[i] -= nelt;
49483 /* Check whether the mask can be applied to the vector type. */
49484 d.one_operand_p = (which != 3);
49486 /* Implementable with shufps or pshufd. */
49487 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49488 return true;
49490 /* Otherwise we have to go through the motions and see if we can
49491 figure out how to generate the requested permutation. */
49492 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49493 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49494 if (!d.one_operand_p)
49495 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49497 start_sequence ();
49498 ret = ix86_expand_vec_perm_const_1 (&d);
49499 end_sequence ();
49501 return ret;
49504 void
49505 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49507 struct expand_vec_perm_d d;
49508 unsigned i, nelt;
49510 d.target = targ;
49511 d.op0 = op0;
49512 d.op1 = op1;
49513 d.vmode = GET_MODE (targ);
49514 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49515 d.one_operand_p = false;
49516 d.testing_p = false;
49518 for (i = 0; i < nelt; ++i)
49519 d.perm[i] = i * 2 + odd;
49521 /* We'll either be able to implement the permutation directly... */
49522 if (expand_vec_perm_1 (&d))
49523 return;
49525 /* ... or we use the special-case patterns. */
49526 expand_vec_perm_even_odd_1 (&d, odd);
49529 static void
49530 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49532 struct expand_vec_perm_d d;
49533 unsigned i, nelt, base;
49534 bool ok;
49536 d.target = targ;
49537 d.op0 = op0;
49538 d.op1 = op1;
49539 d.vmode = GET_MODE (targ);
49540 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49541 d.one_operand_p = false;
49542 d.testing_p = false;
49544 base = high_p ? nelt / 2 : 0;
49545 for (i = 0; i < nelt / 2; ++i)
49547 d.perm[i * 2] = i + base;
49548 d.perm[i * 2 + 1] = i + base + nelt;
49551 /* Note that for AVX this isn't one instruction. */
49552 ok = ix86_expand_vec_perm_const_1 (&d);
49553 gcc_assert (ok);
49557 /* Expand a vector operation CODE for a V*QImode in terms of the
49558 same operation on V*HImode. */
49560 void
49561 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49563 machine_mode qimode = GET_MODE (dest);
49564 machine_mode himode;
49565 rtx (*gen_il) (rtx, rtx, rtx);
49566 rtx (*gen_ih) (rtx, rtx, rtx);
49567 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49568 struct expand_vec_perm_d d;
49569 bool ok, full_interleave;
49570 bool uns_p = false;
49571 int i;
49573 switch (qimode)
49575 case V16QImode:
49576 himode = V8HImode;
49577 gen_il = gen_vec_interleave_lowv16qi;
49578 gen_ih = gen_vec_interleave_highv16qi;
49579 break;
49580 case V32QImode:
49581 himode = V16HImode;
49582 gen_il = gen_avx2_interleave_lowv32qi;
49583 gen_ih = gen_avx2_interleave_highv32qi;
49584 break;
49585 case V64QImode:
49586 himode = V32HImode;
49587 gen_il = gen_avx512bw_interleave_lowv64qi;
49588 gen_ih = gen_avx512bw_interleave_highv64qi;
49589 break;
49590 default:
49591 gcc_unreachable ();
49594 op2_l = op2_h = op2;
49595 switch (code)
49597 case MULT:
49598 /* Unpack data such that we've got a source byte in each low byte of
49599 each word. We don't care what goes into the high byte of each word.
49600 Rather than trying to get zero in there, most convenient is to let
49601 it be a copy of the low byte. */
49602 op2_l = gen_reg_rtx (qimode);
49603 op2_h = gen_reg_rtx (qimode);
49604 emit_insn (gen_il (op2_l, op2, op2));
49605 emit_insn (gen_ih (op2_h, op2, op2));
49606 /* FALLTHRU */
49608 op1_l = gen_reg_rtx (qimode);
49609 op1_h = gen_reg_rtx (qimode);
49610 emit_insn (gen_il (op1_l, op1, op1));
49611 emit_insn (gen_ih (op1_h, op1, op1));
49612 full_interleave = qimode == V16QImode;
49613 break;
49615 case ASHIFT:
49616 case LSHIFTRT:
49617 uns_p = true;
49618 /* FALLTHRU */
49619 case ASHIFTRT:
49620 op1_l = gen_reg_rtx (himode);
49621 op1_h = gen_reg_rtx (himode);
49622 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
49623 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
49624 full_interleave = true;
49625 break;
49626 default:
49627 gcc_unreachable ();
49630 /* Perform the operation. */
49631 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
49632 1, OPTAB_DIRECT);
49633 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
49634 1, OPTAB_DIRECT);
49635 gcc_assert (res_l && res_h);
49637 /* Merge the data back into the right place. */
49638 d.target = dest;
49639 d.op0 = gen_lowpart (qimode, res_l);
49640 d.op1 = gen_lowpart (qimode, res_h);
49641 d.vmode = qimode;
49642 d.nelt = GET_MODE_NUNITS (qimode);
49643 d.one_operand_p = false;
49644 d.testing_p = false;
49646 if (full_interleave)
49648 /* For SSE2, we used an full interleave, so the desired
49649 results are in the even elements. */
49650 for (i = 0; i < 64; ++i)
49651 d.perm[i] = i * 2;
49653 else
49655 /* For AVX, the interleave used above was not cross-lane. So the
49656 extraction is evens but with the second and third quarter swapped.
49657 Happily, that is even one insn shorter than even extraction. */
49658 for (i = 0; i < 64; ++i)
49659 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
49662 ok = ix86_expand_vec_perm_const_1 (&d);
49663 gcc_assert (ok);
49665 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49666 gen_rtx_fmt_ee (code, qimode, op1, op2));
49669 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
49670 if op is CONST_VECTOR with all odd elements equal to their
49671 preceding element. */
49673 static bool
49674 const_vector_equal_evenodd_p (rtx op)
49676 machine_mode mode = GET_MODE (op);
49677 int i, nunits = GET_MODE_NUNITS (mode);
49678 if (GET_CODE (op) != CONST_VECTOR
49679 || nunits != CONST_VECTOR_NUNITS (op))
49680 return false;
49681 for (i = 0; i < nunits; i += 2)
49682 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
49683 return false;
49684 return true;
49687 void
49688 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
49689 bool uns_p, bool odd_p)
49691 machine_mode mode = GET_MODE (op1);
49692 machine_mode wmode = GET_MODE (dest);
49693 rtx x;
49694 rtx orig_op1 = op1, orig_op2 = op2;
49696 if (!nonimmediate_operand (op1, mode))
49697 op1 = force_reg (mode, op1);
49698 if (!nonimmediate_operand (op2, mode))
49699 op2 = force_reg (mode, op2);
49701 /* We only play even/odd games with vectors of SImode. */
49702 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
49704 /* If we're looking for the odd results, shift those members down to
49705 the even slots. For some cpus this is faster than a PSHUFD. */
49706 if (odd_p)
49708 /* For XOP use vpmacsdqh, but only for smult, as it is only
49709 signed. */
49710 if (TARGET_XOP && mode == V4SImode && !uns_p)
49712 x = force_reg (wmode, CONST0_RTX (wmode));
49713 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
49714 return;
49717 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
49718 if (!const_vector_equal_evenodd_p (orig_op1))
49719 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
49720 x, NULL, 1, OPTAB_DIRECT);
49721 if (!const_vector_equal_evenodd_p (orig_op2))
49722 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
49723 x, NULL, 1, OPTAB_DIRECT);
49724 op1 = gen_lowpart (mode, op1);
49725 op2 = gen_lowpart (mode, op2);
49728 if (mode == V16SImode)
49730 if (uns_p)
49731 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
49732 else
49733 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
49735 else if (mode == V8SImode)
49737 if (uns_p)
49738 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
49739 else
49740 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
49742 else if (uns_p)
49743 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
49744 else if (TARGET_SSE4_1)
49745 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
49746 else
49748 rtx s1, s2, t0, t1, t2;
49750 /* The easiest way to implement this without PMULDQ is to go through
49751 the motions as if we are performing a full 64-bit multiply. With
49752 the exception that we need to do less shuffling of the elements. */
49754 /* Compute the sign-extension, aka highparts, of the two operands. */
49755 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49756 op1, pc_rtx, pc_rtx);
49757 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49758 op2, pc_rtx, pc_rtx);
49760 /* Multiply LO(A) * HI(B), and vice-versa. */
49761 t1 = gen_reg_rtx (wmode);
49762 t2 = gen_reg_rtx (wmode);
49763 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
49764 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
49766 /* Multiply LO(A) * LO(B). */
49767 t0 = gen_reg_rtx (wmode);
49768 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
49770 /* Combine and shift the highparts into place. */
49771 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
49772 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
49773 1, OPTAB_DIRECT);
49775 /* Combine high and low parts. */
49776 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
49777 return;
49779 emit_insn (x);
49782 void
49783 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
49784 bool uns_p, bool high_p)
49786 machine_mode wmode = GET_MODE (dest);
49787 machine_mode mode = GET_MODE (op1);
49788 rtx t1, t2, t3, t4, mask;
49790 switch (mode)
49792 case V4SImode:
49793 t1 = gen_reg_rtx (mode);
49794 t2 = gen_reg_rtx (mode);
49795 if (TARGET_XOP && !uns_p)
49797 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
49798 shuffle the elements once so that all elements are in the right
49799 place for immediate use: { A C B D }. */
49800 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
49801 const1_rtx, GEN_INT (3)));
49802 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
49803 const1_rtx, GEN_INT (3)));
49805 else
49807 /* Put the elements into place for the multiply. */
49808 ix86_expand_vec_interleave (t1, op1, op1, high_p);
49809 ix86_expand_vec_interleave (t2, op2, op2, high_p);
49810 high_p = false;
49812 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
49813 break;
49815 case V8SImode:
49816 /* Shuffle the elements between the lanes. After this we
49817 have { A B E F | C D G H } for each operand. */
49818 t1 = gen_reg_rtx (V4DImode);
49819 t2 = gen_reg_rtx (V4DImode);
49820 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
49821 const0_rtx, const2_rtx,
49822 const1_rtx, GEN_INT (3)));
49823 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
49824 const0_rtx, const2_rtx,
49825 const1_rtx, GEN_INT (3)));
49827 /* Shuffle the elements within the lanes. After this we
49828 have { A A B B | C C D D } or { E E F F | G G H H }. */
49829 t3 = gen_reg_rtx (V8SImode);
49830 t4 = gen_reg_rtx (V8SImode);
49831 mask = GEN_INT (high_p
49832 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
49833 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
49834 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
49835 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
49837 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
49838 break;
49840 case V8HImode:
49841 case V16HImode:
49842 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
49843 uns_p, OPTAB_DIRECT);
49844 t2 = expand_binop (mode,
49845 uns_p ? umul_highpart_optab : smul_highpart_optab,
49846 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
49847 gcc_assert (t1 && t2);
49849 t3 = gen_reg_rtx (mode);
49850 ix86_expand_vec_interleave (t3, t1, t2, high_p);
49851 emit_move_insn (dest, gen_lowpart (wmode, t3));
49852 break;
49854 case V16QImode:
49855 case V32QImode:
49856 case V32HImode:
49857 case V16SImode:
49858 case V64QImode:
49859 t1 = gen_reg_rtx (wmode);
49860 t2 = gen_reg_rtx (wmode);
49861 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
49862 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
49864 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
49865 break;
49867 default:
49868 gcc_unreachable ();
49872 void
49873 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
49875 rtx res_1, res_2, res_3, res_4;
49877 res_1 = gen_reg_rtx (V4SImode);
49878 res_2 = gen_reg_rtx (V4SImode);
49879 res_3 = gen_reg_rtx (V2DImode);
49880 res_4 = gen_reg_rtx (V2DImode);
49881 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
49882 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
49884 /* Move the results in element 2 down to element 1; we don't care
49885 what goes in elements 2 and 3. Then we can merge the parts
49886 back together with an interleave.
49888 Note that two other sequences were tried:
49889 (1) Use interleaves at the start instead of psrldq, which allows
49890 us to use a single shufps to merge things back at the end.
49891 (2) Use shufps here to combine the two vectors, then pshufd to
49892 put the elements in the correct order.
49893 In both cases the cost of the reformatting stall was too high
49894 and the overall sequence slower. */
49896 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
49897 const0_rtx, const2_rtx,
49898 const0_rtx, const0_rtx));
49899 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
49900 const0_rtx, const2_rtx,
49901 const0_rtx, const0_rtx));
49902 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
49904 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
49907 void
49908 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
49910 machine_mode mode = GET_MODE (op0);
49911 rtx t1, t2, t3, t4, t5, t6;
49913 if (TARGET_AVX512DQ && mode == V8DImode)
49914 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
49915 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
49916 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
49917 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
49918 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
49919 else if (TARGET_XOP && mode == V2DImode)
49921 /* op1: A,B,C,D, op2: E,F,G,H */
49922 op1 = gen_lowpart (V4SImode, op1);
49923 op2 = gen_lowpart (V4SImode, op2);
49925 t1 = gen_reg_rtx (V4SImode);
49926 t2 = gen_reg_rtx (V4SImode);
49927 t3 = gen_reg_rtx (V2DImode);
49928 t4 = gen_reg_rtx (V2DImode);
49930 /* t1: B,A,D,C */
49931 emit_insn (gen_sse2_pshufd_1 (t1, op1,
49932 GEN_INT (1),
49933 GEN_INT (0),
49934 GEN_INT (3),
49935 GEN_INT (2)));
49937 /* t2: (B*E),(A*F),(D*G),(C*H) */
49938 emit_insn (gen_mulv4si3 (t2, t1, op2));
49940 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
49941 emit_insn (gen_xop_phadddq (t3, t2));
49943 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
49944 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
49946 /* Multiply lower parts and add all */
49947 t5 = gen_reg_rtx (V2DImode);
49948 emit_insn (gen_vec_widen_umult_even_v4si (t5,
49949 gen_lowpart (V4SImode, op1),
49950 gen_lowpart (V4SImode, op2)));
49951 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
49954 else
49956 machine_mode nmode;
49957 rtx (*umul) (rtx, rtx, rtx);
49959 if (mode == V2DImode)
49961 umul = gen_vec_widen_umult_even_v4si;
49962 nmode = V4SImode;
49964 else if (mode == V4DImode)
49966 umul = gen_vec_widen_umult_even_v8si;
49967 nmode = V8SImode;
49969 else if (mode == V8DImode)
49971 umul = gen_vec_widen_umult_even_v16si;
49972 nmode = V16SImode;
49974 else
49975 gcc_unreachable ();
49978 /* Multiply low parts. */
49979 t1 = gen_reg_rtx (mode);
49980 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
49982 /* Shift input vectors right 32 bits so we can multiply high parts. */
49983 t6 = GEN_INT (32);
49984 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
49985 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
49987 /* Multiply high parts by low parts. */
49988 t4 = gen_reg_rtx (mode);
49989 t5 = gen_reg_rtx (mode);
49990 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
49991 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
49993 /* Combine and shift the highparts back. */
49994 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
49995 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
49997 /* Combine high and low parts. */
49998 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
50001 set_unique_reg_note (get_last_insn (), REG_EQUAL,
50002 gen_rtx_MULT (mode, op1, op2));
50005 /* Return 1 if control tansfer instruction INSN
50006 should be encoded with bnd prefix.
50007 If insn is NULL then return 1 when control
50008 transfer instructions should be prefixed with
50009 bnd by default for current function. */
50011 bool
50012 ix86_bnd_prefixed_insn_p (rtx insn)
50014 /* For call insns check special flag. */
50015 if (insn && CALL_P (insn))
50017 rtx call = get_call_rtx_from (insn);
50018 if (call)
50019 return CALL_EXPR_WITH_BOUNDS_P (call);
50022 /* All other insns are prefixed only if function is instrumented. */
50023 return chkp_function_instrumented_p (current_function_decl);
50026 /* Calculate integer abs() using only SSE2 instructions. */
50028 void
50029 ix86_expand_sse2_abs (rtx target, rtx input)
50031 machine_mode mode = GET_MODE (target);
50032 rtx tmp0, tmp1, x;
50034 switch (mode)
50036 /* For 32-bit signed integer X, the best way to calculate the absolute
50037 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
50038 case V4SImode:
50039 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
50040 GEN_INT (GET_MODE_BITSIZE
50041 (GET_MODE_INNER (mode)) - 1),
50042 NULL, 0, OPTAB_DIRECT);
50043 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
50044 NULL, 0, OPTAB_DIRECT);
50045 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
50046 target, 0, OPTAB_DIRECT);
50047 break;
50049 /* For 16-bit signed integer X, the best way to calculate the absolute
50050 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
50051 case V8HImode:
50052 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50054 x = expand_simple_binop (mode, SMAX, tmp0, input,
50055 target, 0, OPTAB_DIRECT);
50056 break;
50058 /* For 8-bit signed integer X, the best way to calculate the absolute
50059 value of X is min ((unsigned char) X, (unsigned char) (-X)),
50060 as SSE2 provides the PMINUB insn. */
50061 case V16QImode:
50062 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50064 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
50065 target, 0, OPTAB_DIRECT);
50066 break;
50068 default:
50069 gcc_unreachable ();
50072 if (x != target)
50073 emit_move_insn (target, x);
50076 /* Expand an insert into a vector register through pinsr insn.
50077 Return true if successful. */
50079 bool
50080 ix86_expand_pinsr (rtx *operands)
50082 rtx dst = operands[0];
50083 rtx src = operands[3];
50085 unsigned int size = INTVAL (operands[1]);
50086 unsigned int pos = INTVAL (operands[2]);
50088 if (GET_CODE (dst) == SUBREG)
50090 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
50091 dst = SUBREG_REG (dst);
50094 if (GET_CODE (src) == SUBREG)
50095 src = SUBREG_REG (src);
50097 switch (GET_MODE (dst))
50099 case V16QImode:
50100 case V8HImode:
50101 case V4SImode:
50102 case V2DImode:
50104 machine_mode srcmode, dstmode;
50105 rtx (*pinsr)(rtx, rtx, rtx, rtx);
50107 srcmode = mode_for_size (size, MODE_INT, 0);
50109 switch (srcmode)
50111 case QImode:
50112 if (!TARGET_SSE4_1)
50113 return false;
50114 dstmode = V16QImode;
50115 pinsr = gen_sse4_1_pinsrb;
50116 break;
50118 case HImode:
50119 if (!TARGET_SSE2)
50120 return false;
50121 dstmode = V8HImode;
50122 pinsr = gen_sse2_pinsrw;
50123 break;
50125 case SImode:
50126 if (!TARGET_SSE4_1)
50127 return false;
50128 dstmode = V4SImode;
50129 pinsr = gen_sse4_1_pinsrd;
50130 break;
50132 case DImode:
50133 gcc_assert (TARGET_64BIT);
50134 if (!TARGET_SSE4_1)
50135 return false;
50136 dstmode = V2DImode;
50137 pinsr = gen_sse4_1_pinsrq;
50138 break;
50140 default:
50141 return false;
50144 rtx d = dst;
50145 if (GET_MODE (dst) != dstmode)
50146 d = gen_reg_rtx (dstmode);
50147 src = gen_lowpart (srcmode, src);
50149 pos /= size;
50151 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
50152 GEN_INT (1 << pos)));
50153 if (d != dst)
50154 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
50155 return true;
50158 default:
50159 return false;
50163 /* This function returns the calling abi specific va_list type node.
50164 It returns the FNDECL specific va_list type. */
50166 static tree
50167 ix86_fn_abi_va_list (tree fndecl)
50169 if (!TARGET_64BIT)
50170 return va_list_type_node;
50171 gcc_assert (fndecl != NULL_TREE);
50173 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
50174 return ms_va_list_type_node;
50175 else
50176 return sysv_va_list_type_node;
50179 /* Returns the canonical va_list type specified by TYPE. If there
50180 is no valid TYPE provided, it return NULL_TREE. */
50182 static tree
50183 ix86_canonical_va_list_type (tree type)
50185 tree wtype, htype;
50187 /* Resolve references and pointers to va_list type. */
50188 if (TREE_CODE (type) == MEM_REF)
50189 type = TREE_TYPE (type);
50190 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
50191 type = TREE_TYPE (type);
50192 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
50193 type = TREE_TYPE (type);
50195 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
50197 wtype = va_list_type_node;
50198 gcc_assert (wtype != NULL_TREE);
50199 htype = type;
50200 if (TREE_CODE (wtype) == ARRAY_TYPE)
50202 /* If va_list is an array type, the argument may have decayed
50203 to a pointer type, e.g. by being passed to another function.
50204 In that case, unwrap both types so that we can compare the
50205 underlying records. */
50206 if (TREE_CODE (htype) == ARRAY_TYPE
50207 || POINTER_TYPE_P (htype))
50209 wtype = TREE_TYPE (wtype);
50210 htype = TREE_TYPE (htype);
50213 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50214 return va_list_type_node;
50215 wtype = sysv_va_list_type_node;
50216 gcc_assert (wtype != NULL_TREE);
50217 htype = type;
50218 if (TREE_CODE (wtype) == ARRAY_TYPE)
50220 /* If va_list is an array type, the argument may have decayed
50221 to a pointer type, e.g. by being passed to another function.
50222 In that case, unwrap both types so that we can compare the
50223 underlying records. */
50224 if (TREE_CODE (htype) == ARRAY_TYPE
50225 || POINTER_TYPE_P (htype))
50227 wtype = TREE_TYPE (wtype);
50228 htype = TREE_TYPE (htype);
50231 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50232 return sysv_va_list_type_node;
50233 wtype = ms_va_list_type_node;
50234 gcc_assert (wtype != NULL_TREE);
50235 htype = type;
50236 if (TREE_CODE (wtype) == ARRAY_TYPE)
50238 /* If va_list is an array type, the argument may have decayed
50239 to a pointer type, e.g. by being passed to another function.
50240 In that case, unwrap both types so that we can compare the
50241 underlying records. */
50242 if (TREE_CODE (htype) == ARRAY_TYPE
50243 || POINTER_TYPE_P (htype))
50245 wtype = TREE_TYPE (wtype);
50246 htype = TREE_TYPE (htype);
50249 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50250 return ms_va_list_type_node;
50251 return NULL_TREE;
50253 return std_canonical_va_list_type (type);
50256 /* Iterate through the target-specific builtin types for va_list.
50257 IDX denotes the iterator, *PTREE is set to the result type of
50258 the va_list builtin, and *PNAME to its internal type.
50259 Returns zero if there is no element for this index, otherwise
50260 IDX should be increased upon the next call.
50261 Note, do not iterate a base builtin's name like __builtin_va_list.
50262 Used from c_common_nodes_and_builtins. */
50264 static int
50265 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50267 if (TARGET_64BIT)
50269 switch (idx)
50271 default:
50272 break;
50274 case 0:
50275 *ptree = ms_va_list_type_node;
50276 *pname = "__builtin_ms_va_list";
50277 return 1;
50279 case 1:
50280 *ptree = sysv_va_list_type_node;
50281 *pname = "__builtin_sysv_va_list";
50282 return 1;
50286 return 0;
50289 #undef TARGET_SCHED_DISPATCH
50290 #define TARGET_SCHED_DISPATCH has_dispatch
50291 #undef TARGET_SCHED_DISPATCH_DO
50292 #define TARGET_SCHED_DISPATCH_DO do_dispatch
50293 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50294 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50295 #undef TARGET_SCHED_REORDER
50296 #define TARGET_SCHED_REORDER ix86_sched_reorder
50297 #undef TARGET_SCHED_ADJUST_PRIORITY
50298 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50299 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50300 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50301 ix86_dependencies_evaluation_hook
50303 /* The size of the dispatch window is the total number of bytes of
50304 object code allowed in a window. */
50305 #define DISPATCH_WINDOW_SIZE 16
50307 /* Number of dispatch windows considered for scheduling. */
50308 #define MAX_DISPATCH_WINDOWS 3
50310 /* Maximum number of instructions in a window. */
50311 #define MAX_INSN 4
50313 /* Maximum number of immediate operands in a window. */
50314 #define MAX_IMM 4
50316 /* Maximum number of immediate bits allowed in a window. */
50317 #define MAX_IMM_SIZE 128
50319 /* Maximum number of 32 bit immediates allowed in a window. */
50320 #define MAX_IMM_32 4
50322 /* Maximum number of 64 bit immediates allowed in a window. */
50323 #define MAX_IMM_64 2
50325 /* Maximum total of loads or prefetches allowed in a window. */
50326 #define MAX_LOAD 2
50328 /* Maximum total of stores allowed in a window. */
50329 #define MAX_STORE 1
50331 #undef BIG
50332 #define BIG 100
50335 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
50336 enum dispatch_group {
50337 disp_no_group = 0,
50338 disp_load,
50339 disp_store,
50340 disp_load_store,
50341 disp_prefetch,
50342 disp_imm,
50343 disp_imm_32,
50344 disp_imm_64,
50345 disp_branch,
50346 disp_cmp,
50347 disp_jcc,
50348 disp_last
50351 /* Number of allowable groups in a dispatch window. It is an array
50352 indexed by dispatch_group enum. 100 is used as a big number,
50353 because the number of these kind of operations does not have any
50354 effect in dispatch window, but we need them for other reasons in
50355 the table. */
50356 static unsigned int num_allowable_groups[disp_last] = {
50357 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
50360 char group_name[disp_last + 1][16] = {
50361 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50362 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50363 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50366 /* Instruction path. */
50367 enum insn_path {
50368 no_path = 0,
50369 path_single, /* Single micro op. */
50370 path_double, /* Double micro op. */
50371 path_multi, /* Instructions with more than 2 micro op.. */
50372 last_path
50375 /* sched_insn_info defines a window to the instructions scheduled in
50376 the basic block. It contains a pointer to the insn_info table and
50377 the instruction scheduled.
50379 Windows are allocated for each basic block and are linked
50380 together. */
50381 typedef struct sched_insn_info_s {
50382 rtx insn;
50383 enum dispatch_group group;
50384 enum insn_path path;
50385 int byte_len;
50386 int imm_bytes;
50387 } sched_insn_info;
50389 /* Linked list of dispatch windows. This is a two way list of
50390 dispatch windows of a basic block. It contains information about
50391 the number of uops in the window and the total number of
50392 instructions and of bytes in the object code for this dispatch
50393 window. */
50394 typedef struct dispatch_windows_s {
50395 int num_insn; /* Number of insn in the window. */
50396 int num_uops; /* Number of uops in the window. */
50397 int window_size; /* Number of bytes in the window. */
50398 int window_num; /* Window number between 0 or 1. */
50399 int num_imm; /* Number of immediates in an insn. */
50400 int num_imm_32; /* Number of 32 bit immediates in an insn. */
50401 int num_imm_64; /* Number of 64 bit immediates in an insn. */
50402 int imm_size; /* Total immediates in the window. */
50403 int num_loads; /* Total memory loads in the window. */
50404 int num_stores; /* Total memory stores in the window. */
50405 int violation; /* Violation exists in window. */
50406 sched_insn_info *window; /* Pointer to the window. */
50407 struct dispatch_windows_s *next;
50408 struct dispatch_windows_s *prev;
50409 } dispatch_windows;
50411 /* Immediate valuse used in an insn. */
50412 typedef struct imm_info_s
50414 int imm;
50415 int imm32;
50416 int imm64;
50417 } imm_info;
50419 static dispatch_windows *dispatch_window_list;
50420 static dispatch_windows *dispatch_window_list1;
50422 /* Get dispatch group of insn. */
50424 static enum dispatch_group
50425 get_mem_group (rtx_insn *insn)
50427 enum attr_memory memory;
50429 if (INSN_CODE (insn) < 0)
50430 return disp_no_group;
50431 memory = get_attr_memory (insn);
50432 if (memory == MEMORY_STORE)
50433 return disp_store;
50435 if (memory == MEMORY_LOAD)
50436 return disp_load;
50438 if (memory == MEMORY_BOTH)
50439 return disp_load_store;
50441 return disp_no_group;
50444 /* Return true if insn is a compare instruction. */
50446 static bool
50447 is_cmp (rtx_insn *insn)
50449 enum attr_type type;
50451 type = get_attr_type (insn);
50452 return (type == TYPE_TEST
50453 || type == TYPE_ICMP
50454 || type == TYPE_FCMP
50455 || GET_CODE (PATTERN (insn)) == COMPARE);
50458 /* Return true if a dispatch violation encountered. */
50460 static bool
50461 dispatch_violation (void)
50463 if (dispatch_window_list->next)
50464 return dispatch_window_list->next->violation;
50465 return dispatch_window_list->violation;
50468 /* Return true if insn is a branch instruction. */
50470 static bool
50471 is_branch (rtx_insn *insn)
50473 return (CALL_P (insn) || JUMP_P (insn));
50476 /* Return true if insn is a prefetch instruction. */
50478 static bool
50479 is_prefetch (rtx_insn *insn)
50481 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
50484 /* This function initializes a dispatch window and the list container holding a
50485 pointer to the window. */
50487 static void
50488 init_window (int window_num)
50490 int i;
50491 dispatch_windows *new_list;
50493 if (window_num == 0)
50494 new_list = dispatch_window_list;
50495 else
50496 new_list = dispatch_window_list1;
50498 new_list->num_insn = 0;
50499 new_list->num_uops = 0;
50500 new_list->window_size = 0;
50501 new_list->next = NULL;
50502 new_list->prev = NULL;
50503 new_list->window_num = window_num;
50504 new_list->num_imm = 0;
50505 new_list->num_imm_32 = 0;
50506 new_list->num_imm_64 = 0;
50507 new_list->imm_size = 0;
50508 new_list->num_loads = 0;
50509 new_list->num_stores = 0;
50510 new_list->violation = false;
50512 for (i = 0; i < MAX_INSN; i++)
50514 new_list->window[i].insn = NULL;
50515 new_list->window[i].group = disp_no_group;
50516 new_list->window[i].path = no_path;
50517 new_list->window[i].byte_len = 0;
50518 new_list->window[i].imm_bytes = 0;
50520 return;
50523 /* This function allocates and initializes a dispatch window and the
50524 list container holding a pointer to the window. */
50526 static dispatch_windows *
50527 allocate_window (void)
50529 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
50530 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
50532 return new_list;
50535 /* This routine initializes the dispatch scheduling information. It
50536 initiates building dispatch scheduler tables and constructs the
50537 first dispatch window. */
50539 static void
50540 init_dispatch_sched (void)
50542 /* Allocate a dispatch list and a window. */
50543 dispatch_window_list = allocate_window ();
50544 dispatch_window_list1 = allocate_window ();
50545 init_window (0);
50546 init_window (1);
50549 /* This function returns true if a branch is detected. End of a basic block
50550 does not have to be a branch, but here we assume only branches end a
50551 window. */
50553 static bool
50554 is_end_basic_block (enum dispatch_group group)
50556 return group == disp_branch;
50559 /* This function is called when the end of a window processing is reached. */
50561 static void
50562 process_end_window (void)
50564 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
50565 if (dispatch_window_list->next)
50567 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
50568 gcc_assert (dispatch_window_list->window_size
50569 + dispatch_window_list1->window_size <= 48);
50570 init_window (1);
50572 init_window (0);
50575 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
50576 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
50577 for 48 bytes of instructions. Note that these windows are not dispatch
50578 windows that their sizes are DISPATCH_WINDOW_SIZE. */
50580 static dispatch_windows *
50581 allocate_next_window (int window_num)
50583 if (window_num == 0)
50585 if (dispatch_window_list->next)
50586 init_window (1);
50587 init_window (0);
50588 return dispatch_window_list;
50591 dispatch_window_list->next = dispatch_window_list1;
50592 dispatch_window_list1->prev = dispatch_window_list;
50594 return dispatch_window_list1;
50597 /* Compute number of immediate operands of an instruction. */
50599 static void
50600 find_constant (rtx in_rtx, imm_info *imm_values)
50602 if (INSN_P (in_rtx))
50603 in_rtx = PATTERN (in_rtx);
50604 subrtx_iterator::array_type array;
50605 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
50606 if (const_rtx x = *iter)
50607 switch (GET_CODE (x))
50609 case CONST:
50610 case SYMBOL_REF:
50611 case CONST_INT:
50612 (imm_values->imm)++;
50613 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
50614 (imm_values->imm32)++;
50615 else
50616 (imm_values->imm64)++;
50617 break;
50619 case CONST_DOUBLE:
50620 case CONST_WIDE_INT:
50621 (imm_values->imm)++;
50622 (imm_values->imm64)++;
50623 break;
50625 case CODE_LABEL:
50626 if (LABEL_KIND (x) == LABEL_NORMAL)
50628 (imm_values->imm)++;
50629 (imm_values->imm32)++;
50631 break;
50633 default:
50634 break;
50638 /* Return total size of immediate operands of an instruction along with number
50639 of corresponding immediate-operands. It initializes its parameters to zero
50640 befor calling FIND_CONSTANT.
50641 INSN is the input instruction. IMM is the total of immediates.
50642 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
50643 bit immediates. */
50645 static int
50646 get_num_immediates (rtx_insn *insn, int *imm, int *imm32, int *imm64)
50648 imm_info imm_values = {0, 0, 0};
50650 find_constant (insn, &imm_values);
50651 *imm = imm_values.imm;
50652 *imm32 = imm_values.imm32;
50653 *imm64 = imm_values.imm64;
50654 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
50657 /* This function indicates if an operand of an instruction is an
50658 immediate. */
50660 static bool
50661 has_immediate (rtx_insn *insn)
50663 int num_imm_operand;
50664 int num_imm32_operand;
50665 int num_imm64_operand;
50667 if (insn)
50668 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50669 &num_imm64_operand);
50670 return false;
50673 /* Return single or double path for instructions. */
50675 static enum insn_path
50676 get_insn_path (rtx_insn *insn)
50678 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
50680 if ((int)path == 0)
50681 return path_single;
50683 if ((int)path == 1)
50684 return path_double;
50686 return path_multi;
50689 /* Return insn dispatch group. */
50691 static enum dispatch_group
50692 get_insn_group (rtx_insn *insn)
50694 enum dispatch_group group = get_mem_group (insn);
50695 if (group)
50696 return group;
50698 if (is_branch (insn))
50699 return disp_branch;
50701 if (is_cmp (insn))
50702 return disp_cmp;
50704 if (has_immediate (insn))
50705 return disp_imm;
50707 if (is_prefetch (insn))
50708 return disp_prefetch;
50710 return disp_no_group;
50713 /* Count number of GROUP restricted instructions in a dispatch
50714 window WINDOW_LIST. */
50716 static int
50717 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
50719 enum dispatch_group group = get_insn_group (insn);
50720 int imm_size;
50721 int num_imm_operand;
50722 int num_imm32_operand;
50723 int num_imm64_operand;
50725 if (group == disp_no_group)
50726 return 0;
50728 if (group == disp_imm)
50730 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50731 &num_imm64_operand);
50732 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
50733 || num_imm_operand + window_list->num_imm > MAX_IMM
50734 || (num_imm32_operand > 0
50735 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
50736 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
50737 || (num_imm64_operand > 0
50738 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
50739 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
50740 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
50741 && num_imm64_operand > 0
50742 && ((window_list->num_imm_64 > 0
50743 && window_list->num_insn >= 2)
50744 || window_list->num_insn >= 3)))
50745 return BIG;
50747 return 1;
50750 if ((group == disp_load_store
50751 && (window_list->num_loads >= MAX_LOAD
50752 || window_list->num_stores >= MAX_STORE))
50753 || ((group == disp_load
50754 || group == disp_prefetch)
50755 && window_list->num_loads >= MAX_LOAD)
50756 || (group == disp_store
50757 && window_list->num_stores >= MAX_STORE))
50758 return BIG;
50760 return 1;
50763 /* This function returns true if insn satisfies dispatch rules on the
50764 last window scheduled. */
50766 static bool
50767 fits_dispatch_window (rtx_insn *insn)
50769 dispatch_windows *window_list = dispatch_window_list;
50770 dispatch_windows *window_list_next = dispatch_window_list->next;
50771 unsigned int num_restrict;
50772 enum dispatch_group group = get_insn_group (insn);
50773 enum insn_path path = get_insn_path (insn);
50774 int sum;
50776 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
50777 instructions should be given the lowest priority in the
50778 scheduling process in Haifa scheduler to make sure they will be
50779 scheduled in the same dispatch window as the reference to them. */
50780 if (group == disp_jcc || group == disp_cmp)
50781 return false;
50783 /* Check nonrestricted. */
50784 if (group == disp_no_group || group == disp_branch)
50785 return true;
50787 /* Get last dispatch window. */
50788 if (window_list_next)
50789 window_list = window_list_next;
50791 if (window_list->window_num == 1)
50793 sum = window_list->prev->window_size + window_list->window_size;
50795 if (sum == 32
50796 || (min_insn_size (insn) + sum) >= 48)
50797 /* Window 1 is full. Go for next window. */
50798 return true;
50801 num_restrict = count_num_restricted (insn, window_list);
50803 if (num_restrict > num_allowable_groups[group])
50804 return false;
50806 /* See if it fits in the first window. */
50807 if (window_list->window_num == 0)
50809 /* The first widow should have only single and double path
50810 uops. */
50811 if (path == path_double
50812 && (window_list->num_uops + 2) > MAX_INSN)
50813 return false;
50814 else if (path != path_single)
50815 return false;
50817 return true;
50820 /* Add an instruction INSN with NUM_UOPS micro-operations to the
50821 dispatch window WINDOW_LIST. */
50823 static void
50824 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
50826 int byte_len = min_insn_size (insn);
50827 int num_insn = window_list->num_insn;
50828 int imm_size;
50829 sched_insn_info *window = window_list->window;
50830 enum dispatch_group group = get_insn_group (insn);
50831 enum insn_path path = get_insn_path (insn);
50832 int num_imm_operand;
50833 int num_imm32_operand;
50834 int num_imm64_operand;
50836 if (!window_list->violation && group != disp_cmp
50837 && !fits_dispatch_window (insn))
50838 window_list->violation = true;
50840 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50841 &num_imm64_operand);
50843 /* Initialize window with new instruction. */
50844 window[num_insn].insn = insn;
50845 window[num_insn].byte_len = byte_len;
50846 window[num_insn].group = group;
50847 window[num_insn].path = path;
50848 window[num_insn].imm_bytes = imm_size;
50850 window_list->window_size += byte_len;
50851 window_list->num_insn = num_insn + 1;
50852 window_list->num_uops = window_list->num_uops + num_uops;
50853 window_list->imm_size += imm_size;
50854 window_list->num_imm += num_imm_operand;
50855 window_list->num_imm_32 += num_imm32_operand;
50856 window_list->num_imm_64 += num_imm64_operand;
50858 if (group == disp_store)
50859 window_list->num_stores += 1;
50860 else if (group == disp_load
50861 || group == disp_prefetch)
50862 window_list->num_loads += 1;
50863 else if (group == disp_load_store)
50865 window_list->num_stores += 1;
50866 window_list->num_loads += 1;
50870 /* Adds a scheduled instruction, INSN, to the current dispatch window.
50871 If the total bytes of instructions or the number of instructions in
50872 the window exceed allowable, it allocates a new window. */
50874 static void
50875 add_to_dispatch_window (rtx_insn *insn)
50877 int byte_len;
50878 dispatch_windows *window_list;
50879 dispatch_windows *next_list;
50880 dispatch_windows *window0_list;
50881 enum insn_path path;
50882 enum dispatch_group insn_group;
50883 bool insn_fits;
50884 int num_insn;
50885 int num_uops;
50886 int window_num;
50887 int insn_num_uops;
50888 int sum;
50890 if (INSN_CODE (insn) < 0)
50891 return;
50893 byte_len = min_insn_size (insn);
50894 window_list = dispatch_window_list;
50895 next_list = window_list->next;
50896 path = get_insn_path (insn);
50897 insn_group = get_insn_group (insn);
50899 /* Get the last dispatch window. */
50900 if (next_list)
50901 window_list = dispatch_window_list->next;
50903 if (path == path_single)
50904 insn_num_uops = 1;
50905 else if (path == path_double)
50906 insn_num_uops = 2;
50907 else
50908 insn_num_uops = (int) path;
50910 /* If current window is full, get a new window.
50911 Window number zero is full, if MAX_INSN uops are scheduled in it.
50912 Window number one is full, if window zero's bytes plus window
50913 one's bytes is 32, or if the bytes of the new instruction added
50914 to the total makes it greater than 48, or it has already MAX_INSN
50915 instructions in it. */
50916 num_insn = window_list->num_insn;
50917 num_uops = window_list->num_uops;
50918 window_num = window_list->window_num;
50919 insn_fits = fits_dispatch_window (insn);
50921 if (num_insn >= MAX_INSN
50922 || num_uops + insn_num_uops > MAX_INSN
50923 || !(insn_fits))
50925 window_num = ~window_num & 1;
50926 window_list = allocate_next_window (window_num);
50929 if (window_num == 0)
50931 add_insn_window (insn, window_list, insn_num_uops);
50932 if (window_list->num_insn >= MAX_INSN
50933 && insn_group == disp_branch)
50935 process_end_window ();
50936 return;
50939 else if (window_num == 1)
50941 window0_list = window_list->prev;
50942 sum = window0_list->window_size + window_list->window_size;
50943 if (sum == 32
50944 || (byte_len + sum) >= 48)
50946 process_end_window ();
50947 window_list = dispatch_window_list;
50950 add_insn_window (insn, window_list, insn_num_uops);
50952 else
50953 gcc_unreachable ();
50955 if (is_end_basic_block (insn_group))
50957 /* End of basic block is reached do end-basic-block process. */
50958 process_end_window ();
50959 return;
50963 /* Print the dispatch window, WINDOW_NUM, to FILE. */
50965 DEBUG_FUNCTION static void
50966 debug_dispatch_window_file (FILE *file, int window_num)
50968 dispatch_windows *list;
50969 int i;
50971 if (window_num == 0)
50972 list = dispatch_window_list;
50973 else
50974 list = dispatch_window_list1;
50976 fprintf (file, "Window #%d:\n", list->window_num);
50977 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
50978 list->num_insn, list->num_uops, list->window_size);
50979 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
50980 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
50982 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
50983 list->num_stores);
50984 fprintf (file, " insn info:\n");
50986 for (i = 0; i < MAX_INSN; i++)
50988 if (!list->window[i].insn)
50989 break;
50990 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
50991 i, group_name[list->window[i].group],
50992 i, (void *)list->window[i].insn,
50993 i, list->window[i].path,
50994 i, list->window[i].byte_len,
50995 i, list->window[i].imm_bytes);
50999 /* Print to stdout a dispatch window. */
51001 DEBUG_FUNCTION void
51002 debug_dispatch_window (int window_num)
51004 debug_dispatch_window_file (stdout, window_num);
51007 /* Print INSN dispatch information to FILE. */
51009 DEBUG_FUNCTION static void
51010 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
51012 int byte_len;
51013 enum insn_path path;
51014 enum dispatch_group group;
51015 int imm_size;
51016 int num_imm_operand;
51017 int num_imm32_operand;
51018 int num_imm64_operand;
51020 if (INSN_CODE (insn) < 0)
51021 return;
51023 byte_len = min_insn_size (insn);
51024 path = get_insn_path (insn);
51025 group = get_insn_group (insn);
51026 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51027 &num_imm64_operand);
51029 fprintf (file, " insn info:\n");
51030 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
51031 group_name[group], path, byte_len);
51032 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51033 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
51036 /* Print to STDERR the status of the ready list with respect to
51037 dispatch windows. */
51039 DEBUG_FUNCTION void
51040 debug_ready_dispatch (void)
51042 int i;
51043 int no_ready = number_in_ready ();
51045 fprintf (stdout, "Number of ready: %d\n", no_ready);
51047 for (i = 0; i < no_ready; i++)
51048 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
51051 /* This routine is the driver of the dispatch scheduler. */
51053 static void
51054 do_dispatch (rtx_insn *insn, int mode)
51056 if (mode == DISPATCH_INIT)
51057 init_dispatch_sched ();
51058 else if (mode == ADD_TO_DISPATCH_WINDOW)
51059 add_to_dispatch_window (insn);
51062 /* Return TRUE if Dispatch Scheduling is supported. */
51064 static bool
51065 has_dispatch (rtx_insn *insn, int action)
51067 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
51068 && flag_dispatch_scheduler)
51069 switch (action)
51071 default:
51072 return false;
51074 case IS_DISPATCH_ON:
51075 return true;
51076 break;
51078 case IS_CMP:
51079 return is_cmp (insn);
51081 case DISPATCH_VIOLATION:
51082 return dispatch_violation ();
51084 case FITS_DISPATCH_WINDOW:
51085 return fits_dispatch_window (insn);
51088 return false;
51091 /* Implementation of reassociation_width target hook used by
51092 reassoc phase to identify parallelism level in reassociated
51093 tree. Statements tree_code is passed in OPC. Arguments type
51094 is passed in MODE.
51096 Currently parallel reassociation is enabled for Atom
51097 processors only and we set reassociation width to be 2
51098 because Atom may issue up to 2 instructions per cycle.
51100 Return value should be fixed if parallel reassociation is
51101 enabled for other processors. */
51103 static int
51104 ix86_reassociation_width (unsigned int, machine_mode mode)
51106 /* Vector part. */
51107 if (VECTOR_MODE_P (mode))
51109 if (TARGET_VECTOR_PARALLEL_EXECUTION)
51110 return 2;
51111 else
51112 return 1;
51115 /* Scalar part. */
51116 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
51117 return 2;
51118 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
51119 return 2;
51120 else
51121 return 1;
51124 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
51125 place emms and femms instructions. */
51127 static machine_mode
51128 ix86_preferred_simd_mode (machine_mode mode)
51130 if (!TARGET_SSE)
51131 return word_mode;
51133 switch (mode)
51135 case QImode:
51136 return TARGET_AVX512BW ? V64QImode :
51137 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
51138 case HImode:
51139 return TARGET_AVX512BW ? V32HImode :
51140 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
51141 case SImode:
51142 return TARGET_AVX512F ? V16SImode :
51143 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
51144 case DImode:
51145 return TARGET_AVX512F ? V8DImode :
51146 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
51148 case SFmode:
51149 if (TARGET_AVX512F)
51150 return V16SFmode;
51151 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51152 return V8SFmode;
51153 else
51154 return V4SFmode;
51156 case DFmode:
51157 if (!TARGET_VECTORIZE_DOUBLE)
51158 return word_mode;
51159 else if (TARGET_AVX512F)
51160 return V8DFmode;
51161 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51162 return V4DFmode;
51163 else if (TARGET_SSE2)
51164 return V2DFmode;
51165 /* FALLTHRU */
51167 default:
51168 return word_mode;
51172 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
51173 vectors. If AVX512F is enabled then try vectorizing with 512bit,
51174 256bit and 128bit vectors. */
51176 static unsigned int
51177 ix86_autovectorize_vector_sizes (void)
51179 return TARGET_AVX512F ? 64 | 32 | 16 :
51180 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
51185 /* Return class of registers which could be used for pseudo of MODE
51186 and of class RCLASS for spilling instead of memory. Return NO_REGS
51187 if it is not possible or non-profitable. */
51188 static reg_class_t
51189 ix86_spill_class (reg_class_t rclass, machine_mode mode)
51191 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
51192 && (mode == SImode || (TARGET_64BIT && mode == DImode))
51193 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
51194 return ALL_SSE_REGS;
51195 return NO_REGS;
51198 /* Implement targetm.vectorize.init_cost. */
51200 static void *
51201 ix86_init_cost (struct loop *)
51203 unsigned *cost = XNEWVEC (unsigned, 3);
51204 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
51205 return cost;
51208 /* Implement targetm.vectorize.add_stmt_cost. */
51210 static unsigned
51211 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
51212 struct _stmt_vec_info *stmt_info, int misalign,
51213 enum vect_cost_model_location where)
51215 unsigned *cost = (unsigned *) data;
51216 unsigned retval = 0;
51218 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
51219 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
51221 /* Statements in an inner loop relative to the loop being
51222 vectorized are weighted more heavily. The value here is
51223 arbitrary and could potentially be improved with analysis. */
51224 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
51225 count *= 50; /* FIXME. */
51227 retval = (unsigned) (count * stmt_cost);
51229 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
51230 for Silvermont as it has out of order integer pipeline and can execute
51231 2 scalar instruction per tick, but has in order SIMD pipeline. */
51232 if (TARGET_SILVERMONT || TARGET_INTEL)
51233 if (stmt_info && stmt_info->stmt)
51235 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
51236 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
51237 retval = (retval * 17) / 10;
51240 cost[where] += retval;
51242 return retval;
51245 /* Implement targetm.vectorize.finish_cost. */
51247 static void
51248 ix86_finish_cost (void *data, unsigned *prologue_cost,
51249 unsigned *body_cost, unsigned *epilogue_cost)
51251 unsigned *cost = (unsigned *) data;
51252 *prologue_cost = cost[vect_prologue];
51253 *body_cost = cost[vect_body];
51254 *epilogue_cost = cost[vect_epilogue];
51257 /* Implement targetm.vectorize.destroy_cost_data. */
51259 static void
51260 ix86_destroy_cost_data (void *data)
51262 free (data);
51265 /* Validate target specific memory model bits in VAL. */
51267 static unsigned HOST_WIDE_INT
51268 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
51270 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
51271 bool strong;
51273 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
51274 |MEMMODEL_MASK)
51275 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
51277 warning (OPT_Winvalid_memory_model,
51278 "Unknown architecture specific memory model");
51279 return MEMMODEL_SEQ_CST;
51281 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
51282 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
51284 warning (OPT_Winvalid_memory_model,
51285 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
51286 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
51288 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
51290 warning (OPT_Winvalid_memory_model,
51291 "HLE_RELEASE not used with RELEASE or stronger memory model");
51292 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
51294 return val;
51297 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
51298 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
51299 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
51300 or number of vecsize_mangle variants that should be emitted. */
51302 static int
51303 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
51304 struct cgraph_simd_clone *clonei,
51305 tree base_type, int num)
51307 int ret = 1;
51309 if (clonei->simdlen
51310 && (clonei->simdlen < 2
51311 || clonei->simdlen > 16
51312 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
51314 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51315 "unsupported simdlen %d", clonei->simdlen);
51316 return 0;
51319 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
51320 if (TREE_CODE (ret_type) != VOID_TYPE)
51321 switch (TYPE_MODE (ret_type))
51323 case QImode:
51324 case HImode:
51325 case SImode:
51326 case DImode:
51327 case SFmode:
51328 case DFmode:
51329 /* case SCmode: */
51330 /* case DCmode: */
51331 break;
51332 default:
51333 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51334 "unsupported return type %qT for simd\n", ret_type);
51335 return 0;
51338 tree t;
51339 int i;
51341 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
51342 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
51343 switch (TYPE_MODE (TREE_TYPE (t)))
51345 case QImode:
51346 case HImode:
51347 case SImode:
51348 case DImode:
51349 case SFmode:
51350 case DFmode:
51351 /* case SCmode: */
51352 /* case DCmode: */
51353 break;
51354 default:
51355 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51356 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
51357 return 0;
51360 if (clonei->cilk_elemental)
51362 /* Parse here processor clause. If not present, default to 'b'. */
51363 clonei->vecsize_mangle = 'b';
51365 else if (!TREE_PUBLIC (node->decl))
51367 /* If the function isn't exported, we can pick up just one ISA
51368 for the clones. */
51369 if (TARGET_AVX2)
51370 clonei->vecsize_mangle = 'd';
51371 else if (TARGET_AVX)
51372 clonei->vecsize_mangle = 'c';
51373 else
51374 clonei->vecsize_mangle = 'b';
51375 ret = 1;
51377 else
51379 clonei->vecsize_mangle = "bcd"[num];
51380 ret = 3;
51382 switch (clonei->vecsize_mangle)
51384 case 'b':
51385 clonei->vecsize_int = 128;
51386 clonei->vecsize_float = 128;
51387 break;
51388 case 'c':
51389 clonei->vecsize_int = 128;
51390 clonei->vecsize_float = 256;
51391 break;
51392 case 'd':
51393 clonei->vecsize_int = 256;
51394 clonei->vecsize_float = 256;
51395 break;
51397 if (clonei->simdlen == 0)
51399 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
51400 clonei->simdlen = clonei->vecsize_int;
51401 else
51402 clonei->simdlen = clonei->vecsize_float;
51403 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
51404 if (clonei->simdlen > 16)
51405 clonei->simdlen = 16;
51407 return ret;
51410 /* Add target attribute to SIMD clone NODE if needed. */
51412 static void
51413 ix86_simd_clone_adjust (struct cgraph_node *node)
51415 const char *str = NULL;
51416 gcc_assert (node->decl == cfun->decl);
51417 switch (node->simdclone->vecsize_mangle)
51419 case 'b':
51420 if (!TARGET_SSE2)
51421 str = "sse2";
51422 break;
51423 case 'c':
51424 if (!TARGET_AVX)
51425 str = "avx";
51426 break;
51427 case 'd':
51428 if (!TARGET_AVX2)
51429 str = "avx2";
51430 break;
51431 default:
51432 gcc_unreachable ();
51434 if (str == NULL)
51435 return;
51436 push_cfun (NULL);
51437 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
51438 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
51439 gcc_assert (ok);
51440 pop_cfun ();
51441 ix86_reset_previous_fndecl ();
51442 ix86_set_current_function (node->decl);
51445 /* If SIMD clone NODE can't be used in a vectorized loop
51446 in current function, return -1, otherwise return a badness of using it
51447 (0 if it is most desirable from vecsize_mangle point of view, 1
51448 slightly less desirable, etc.). */
51450 static int
51451 ix86_simd_clone_usable (struct cgraph_node *node)
51453 switch (node->simdclone->vecsize_mangle)
51455 case 'b':
51456 if (!TARGET_SSE2)
51457 return -1;
51458 if (!TARGET_AVX)
51459 return 0;
51460 return TARGET_AVX2 ? 2 : 1;
51461 case 'c':
51462 if (!TARGET_AVX)
51463 return -1;
51464 return TARGET_AVX2 ? 1 : 0;
51465 break;
51466 case 'd':
51467 if (!TARGET_AVX2)
51468 return -1;
51469 return 0;
51470 default:
51471 gcc_unreachable ();
51475 /* This function adjusts the unroll factor based on
51476 the hardware capabilities. For ex, bdver3 has
51477 a loop buffer which makes unrolling of smaller
51478 loops less important. This function decides the
51479 unroll factor using number of memory references
51480 (value 32 is used) as a heuristic. */
51482 static unsigned
51483 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
51485 basic_block *bbs;
51486 rtx_insn *insn;
51487 unsigned i;
51488 unsigned mem_count = 0;
51490 if (!TARGET_ADJUST_UNROLL)
51491 return nunroll;
51493 /* Count the number of memory references within the loop body.
51494 This value determines the unrolling factor for bdver3 and bdver4
51495 architectures. */
51496 subrtx_iterator::array_type array;
51497 bbs = get_loop_body (loop);
51498 for (i = 0; i < loop->num_nodes; i++)
51499 FOR_BB_INSNS (bbs[i], insn)
51500 if (NONDEBUG_INSN_P (insn))
51501 FOR_EACH_SUBRTX (iter, array, insn, NONCONST)
51502 if (const_rtx x = *iter)
51503 if (MEM_P (x))
51505 machine_mode mode = GET_MODE (x);
51506 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
51507 if (n_words > 4)
51508 mem_count += 2;
51509 else
51510 mem_count += 1;
51512 free (bbs);
51514 if (mem_count && mem_count <=32)
51515 return 32/mem_count;
51517 return nunroll;
51521 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
51523 static bool
51524 ix86_float_exceptions_rounding_supported_p (void)
51526 /* For x87 floating point with standard excess precision handling,
51527 there is no adddf3 pattern (since x87 floating point only has
51528 XFmode operations) so the default hook implementation gets this
51529 wrong. */
51530 return TARGET_80387 || TARGET_SSE_MATH;
51533 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
51535 static void
51536 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
51538 if (!TARGET_80387 && !TARGET_SSE_MATH)
51539 return;
51540 tree exceptions_var = create_tmp_var (integer_type_node);
51541 if (TARGET_80387)
51543 tree fenv_index_type = build_index_type (size_int (6));
51544 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
51545 tree fenv_var = create_tmp_var (fenv_type);
51546 mark_addressable (fenv_var);
51547 tree fenv_ptr = build_pointer_type (fenv_type);
51548 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
51549 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
51550 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
51551 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
51552 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
51553 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
51554 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
51555 tree hold_fnclex = build_call_expr (fnclex, 0);
51556 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
51557 hold_fnclex);
51558 *clear = build_call_expr (fnclex, 0);
51559 tree sw_var = create_tmp_var (short_unsigned_type_node);
51560 tree fnstsw_call = build_call_expr (fnstsw, 0);
51561 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
51562 sw_var, fnstsw_call);
51563 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
51564 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
51565 exceptions_var, exceptions_x87);
51566 *update = build2 (COMPOUND_EXPR, integer_type_node,
51567 sw_mod, update_mod);
51568 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
51569 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
51571 if (TARGET_SSE_MATH)
51573 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node);
51574 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node);
51575 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
51576 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
51577 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
51578 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
51579 mxcsr_orig_var, stmxcsr_hold_call);
51580 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
51581 mxcsr_orig_var,
51582 build_int_cst (unsigned_type_node, 0x1f80));
51583 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
51584 build_int_cst (unsigned_type_node, 0xffffffc0));
51585 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
51586 mxcsr_mod_var, hold_mod_val);
51587 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51588 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
51589 hold_assign_orig, hold_assign_mod);
51590 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
51591 ldmxcsr_hold_call);
51592 if (*hold)
51593 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
51594 else
51595 *hold = hold_all;
51596 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51597 if (*clear)
51598 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
51599 ldmxcsr_clear_call);
51600 else
51601 *clear = ldmxcsr_clear_call;
51602 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
51603 tree exceptions_sse = fold_convert (integer_type_node,
51604 stxmcsr_update_call);
51605 if (*update)
51607 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
51608 exceptions_var, exceptions_sse);
51609 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
51610 exceptions_var, exceptions_mod);
51611 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
51612 exceptions_assign);
51614 else
51615 *update = build2 (MODIFY_EXPR, integer_type_node,
51616 exceptions_var, exceptions_sse);
51617 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
51618 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51619 ldmxcsr_update_call);
51621 tree atomic_feraiseexcept
51622 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
51623 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
51624 1, exceptions_var);
51625 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51626 atomic_feraiseexcept_call);
51629 /* Return mode to be used for bounds or VOIDmode
51630 if bounds are not supported. */
51632 static enum machine_mode
51633 ix86_mpx_bound_mode ()
51635 /* Do not support pointer checker if MPX
51636 is not enabled. */
51637 if (!TARGET_MPX)
51639 if (flag_check_pointer_bounds)
51640 warning (0, "Pointer Checker requires MPX support on this target."
51641 " Use -mmpx options to enable MPX.");
51642 return VOIDmode;
51645 return BNDmode;
51648 /* Return constant used to statically initialize constant bounds.
51650 This function is used to create special bound values. For now
51651 only INIT bounds and NONE bounds are expected. More special
51652 values may be added later. */
51654 static tree
51655 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
51657 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
51658 : build_zero_cst (pointer_sized_int_node);
51659 tree high = ub ? build_zero_cst (pointer_sized_int_node)
51660 : build_minus_one_cst (pointer_sized_int_node);
51662 /* This function is supposed to be used to create INIT and
51663 NONE bounds only. */
51664 gcc_assert ((lb == 0 && ub == -1)
51665 || (lb == -1 && ub == 0));
51667 return build_complex (NULL, low, high);
51670 /* Generate a list of statements STMTS to initialize pointer bounds
51671 variable VAR with bounds LB and UB. Return the number of generated
51672 statements. */
51674 static int
51675 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
51677 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
51678 tree lhs, modify, var_p;
51680 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
51681 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
51683 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
51684 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
51685 append_to_statement_list (modify, stmts);
51687 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
51688 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
51689 TYPE_SIZE_UNIT (pointer_sized_int_node)));
51690 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
51691 append_to_statement_list (modify, stmts);
51693 return 2;
51696 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
51697 /* For i386, common symbol is local only for non-PIE binaries. For
51698 x86-64, common symbol is local only for non-PIE binaries or linker
51699 supports copy reloc in PIE binaries. */
51701 static bool
51702 ix86_binds_local_p (const_tree exp)
51704 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
51705 (!flag_pic
51706 || (TARGET_64BIT
51707 && HAVE_LD_PIE_COPYRELOC != 0)));
51709 #endif
51711 /* If MEM is in the form of [base+offset], extract the two parts
51712 of address and set to BASE and OFFSET, otherwise return false. */
51714 static bool
51715 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
51717 rtx addr;
51719 gcc_assert (MEM_P (mem));
51721 addr = XEXP (mem, 0);
51723 if (GET_CODE (addr) == CONST)
51724 addr = XEXP (addr, 0);
51726 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
51728 *base = addr;
51729 *offset = const0_rtx;
51730 return true;
51733 if (GET_CODE (addr) == PLUS
51734 && (REG_P (XEXP (addr, 0))
51735 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
51736 && CONST_INT_P (XEXP (addr, 1)))
51738 *base = XEXP (addr, 0);
51739 *offset = XEXP (addr, 1);
51740 return true;
51743 return false;
51746 /* Given OPERANDS of consecutive load/store, check if we can merge
51747 them into move multiple. LOAD is true if they are load instructions.
51748 MODE is the mode of memory operands. */
51750 bool
51751 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
51752 enum machine_mode mode)
51754 HOST_WIDE_INT offval_1, offval_2, msize;
51755 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
51757 if (load)
51759 mem_1 = operands[1];
51760 mem_2 = operands[3];
51761 reg_1 = operands[0];
51762 reg_2 = operands[2];
51764 else
51766 mem_1 = operands[0];
51767 mem_2 = operands[2];
51768 reg_1 = operands[1];
51769 reg_2 = operands[3];
51772 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
51774 if (REGNO (reg_1) != REGNO (reg_2))
51775 return false;
51777 /* Check if the addresses are in the form of [base+offset]. */
51778 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
51779 return false;
51780 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
51781 return false;
51783 /* Check if the bases are the same. */
51784 if (!rtx_equal_p (base_1, base_2))
51785 return false;
51787 offval_1 = INTVAL (offset_1);
51788 offval_2 = INTVAL (offset_2);
51789 msize = GET_MODE_SIZE (mode);
51790 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
51791 if (offval_1 + msize != offval_2)
51792 return false;
51794 return true;
51797 /* Initialize the GCC target structure. */
51798 #undef TARGET_RETURN_IN_MEMORY
51799 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
51801 #undef TARGET_LEGITIMIZE_ADDRESS
51802 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
51804 #undef TARGET_ATTRIBUTE_TABLE
51805 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
51806 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
51807 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
51808 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51809 # undef TARGET_MERGE_DECL_ATTRIBUTES
51810 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
51811 #endif
51813 #undef TARGET_COMP_TYPE_ATTRIBUTES
51814 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
51816 #undef TARGET_INIT_BUILTINS
51817 #define TARGET_INIT_BUILTINS ix86_init_builtins
51818 #undef TARGET_BUILTIN_DECL
51819 #define TARGET_BUILTIN_DECL ix86_builtin_decl
51820 #undef TARGET_EXPAND_BUILTIN
51821 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
51823 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
51824 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
51825 ix86_builtin_vectorized_function
51827 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
51828 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
51830 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
51831 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
51833 #undef TARGET_VECTORIZE_BUILTIN_GATHER
51834 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
51836 #undef TARGET_BUILTIN_RECIPROCAL
51837 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
51839 #undef TARGET_ASM_FUNCTION_EPILOGUE
51840 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
51842 #undef TARGET_ENCODE_SECTION_INFO
51843 #ifndef SUBTARGET_ENCODE_SECTION_INFO
51844 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
51845 #else
51846 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
51847 #endif
51849 #undef TARGET_ASM_OPEN_PAREN
51850 #define TARGET_ASM_OPEN_PAREN ""
51851 #undef TARGET_ASM_CLOSE_PAREN
51852 #define TARGET_ASM_CLOSE_PAREN ""
51854 #undef TARGET_ASM_BYTE_OP
51855 #define TARGET_ASM_BYTE_OP ASM_BYTE
51857 #undef TARGET_ASM_ALIGNED_HI_OP
51858 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
51859 #undef TARGET_ASM_ALIGNED_SI_OP
51860 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
51861 #ifdef ASM_QUAD
51862 #undef TARGET_ASM_ALIGNED_DI_OP
51863 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
51864 #endif
51866 #undef TARGET_PROFILE_BEFORE_PROLOGUE
51867 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
51869 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
51870 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
51872 #undef TARGET_ASM_UNALIGNED_HI_OP
51873 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
51874 #undef TARGET_ASM_UNALIGNED_SI_OP
51875 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
51876 #undef TARGET_ASM_UNALIGNED_DI_OP
51877 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
51879 #undef TARGET_PRINT_OPERAND
51880 #define TARGET_PRINT_OPERAND ix86_print_operand
51881 #undef TARGET_PRINT_OPERAND_ADDRESS
51882 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
51883 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
51884 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
51885 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
51886 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
51888 #undef TARGET_SCHED_INIT_GLOBAL
51889 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
51890 #undef TARGET_SCHED_ADJUST_COST
51891 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
51892 #undef TARGET_SCHED_ISSUE_RATE
51893 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
51894 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
51895 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
51896 ia32_multipass_dfa_lookahead
51897 #undef TARGET_SCHED_MACRO_FUSION_P
51898 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
51899 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
51900 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
51902 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
51903 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
51905 #undef TARGET_MEMMODEL_CHECK
51906 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
51908 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
51909 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
51911 #ifdef HAVE_AS_TLS
51912 #undef TARGET_HAVE_TLS
51913 #define TARGET_HAVE_TLS true
51914 #endif
51915 #undef TARGET_CANNOT_FORCE_CONST_MEM
51916 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
51917 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
51918 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
51920 #undef TARGET_DELEGITIMIZE_ADDRESS
51921 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
51923 #undef TARGET_MS_BITFIELD_LAYOUT_P
51924 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
51926 #if TARGET_MACHO
51927 #undef TARGET_BINDS_LOCAL_P
51928 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
51929 #else
51930 #undef TARGET_BINDS_LOCAL_P
51931 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
51932 #endif
51933 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51934 #undef TARGET_BINDS_LOCAL_P
51935 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
51936 #endif
51938 #undef TARGET_ASM_OUTPUT_MI_THUNK
51939 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
51940 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
51941 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
51943 #undef TARGET_ASM_FILE_START
51944 #define TARGET_ASM_FILE_START x86_file_start
51946 #undef TARGET_OPTION_OVERRIDE
51947 #define TARGET_OPTION_OVERRIDE ix86_option_override
51949 #undef TARGET_REGISTER_MOVE_COST
51950 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
51951 #undef TARGET_MEMORY_MOVE_COST
51952 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
51953 #undef TARGET_RTX_COSTS
51954 #define TARGET_RTX_COSTS ix86_rtx_costs
51955 #undef TARGET_ADDRESS_COST
51956 #define TARGET_ADDRESS_COST ix86_address_cost
51958 #undef TARGET_FIXED_CONDITION_CODE_REGS
51959 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
51960 #undef TARGET_CC_MODES_COMPATIBLE
51961 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
51963 #undef TARGET_MACHINE_DEPENDENT_REORG
51964 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
51966 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
51967 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
51969 #undef TARGET_BUILD_BUILTIN_VA_LIST
51970 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
51972 #undef TARGET_FOLD_BUILTIN
51973 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
51975 #undef TARGET_COMPARE_VERSION_PRIORITY
51976 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
51978 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
51979 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
51980 ix86_generate_version_dispatcher_body
51982 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
51983 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
51984 ix86_get_function_versions_dispatcher
51986 #undef TARGET_ENUM_VA_LIST_P
51987 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
51989 #undef TARGET_FN_ABI_VA_LIST
51990 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
51992 #undef TARGET_CANONICAL_VA_LIST_TYPE
51993 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
51995 #undef TARGET_EXPAND_BUILTIN_VA_START
51996 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
51998 #undef TARGET_MD_ASM_CLOBBERS
51999 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
52001 #undef TARGET_PROMOTE_PROTOTYPES
52002 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
52003 #undef TARGET_SETUP_INCOMING_VARARGS
52004 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
52005 #undef TARGET_MUST_PASS_IN_STACK
52006 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
52007 #undef TARGET_FUNCTION_ARG_ADVANCE
52008 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
52009 #undef TARGET_FUNCTION_ARG
52010 #define TARGET_FUNCTION_ARG ix86_function_arg
52011 #undef TARGET_INIT_PIC_REG
52012 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
52013 #undef TARGET_USE_PSEUDO_PIC_REG
52014 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
52015 #undef TARGET_FUNCTION_ARG_BOUNDARY
52016 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
52017 #undef TARGET_PASS_BY_REFERENCE
52018 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
52019 #undef TARGET_INTERNAL_ARG_POINTER
52020 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
52021 #undef TARGET_UPDATE_STACK_BOUNDARY
52022 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
52023 #undef TARGET_GET_DRAP_RTX
52024 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
52025 #undef TARGET_STRICT_ARGUMENT_NAMING
52026 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
52027 #undef TARGET_STATIC_CHAIN
52028 #define TARGET_STATIC_CHAIN ix86_static_chain
52029 #undef TARGET_TRAMPOLINE_INIT
52030 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
52031 #undef TARGET_RETURN_POPS_ARGS
52032 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
52034 #undef TARGET_LEGITIMATE_COMBINED_INSN
52035 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
52037 #undef TARGET_ASAN_SHADOW_OFFSET
52038 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
52040 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
52041 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
52043 #undef TARGET_SCALAR_MODE_SUPPORTED_P
52044 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
52046 #undef TARGET_VECTOR_MODE_SUPPORTED_P
52047 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
52049 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
52050 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
52051 ix86_libgcc_floating_mode_supported_p
52053 #undef TARGET_C_MODE_FOR_SUFFIX
52054 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
52056 #ifdef HAVE_AS_TLS
52057 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
52058 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
52059 #endif
52061 #ifdef SUBTARGET_INSERT_ATTRIBUTES
52062 #undef TARGET_INSERT_ATTRIBUTES
52063 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
52064 #endif
52066 #undef TARGET_MANGLE_TYPE
52067 #define TARGET_MANGLE_TYPE ix86_mangle_type
52069 #if !TARGET_MACHO
52070 #undef TARGET_STACK_PROTECT_FAIL
52071 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
52072 #endif
52074 #undef TARGET_FUNCTION_VALUE
52075 #define TARGET_FUNCTION_VALUE ix86_function_value
52077 #undef TARGET_FUNCTION_VALUE_REGNO_P
52078 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
52080 #undef TARGET_PROMOTE_FUNCTION_MODE
52081 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
52083 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
52084 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
52086 #undef TARGET_MEMBER_TYPE_FORCES_BLK
52087 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
52089 #undef TARGET_INSTANTIATE_DECLS
52090 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
52092 #undef TARGET_SECONDARY_RELOAD
52093 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
52095 #undef TARGET_CLASS_MAX_NREGS
52096 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
52098 #undef TARGET_PREFERRED_RELOAD_CLASS
52099 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
52100 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
52101 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
52102 #undef TARGET_CLASS_LIKELY_SPILLED_P
52103 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
52105 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
52106 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
52107 ix86_builtin_vectorization_cost
52108 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
52109 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
52110 ix86_vectorize_vec_perm_const_ok
52111 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
52112 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
52113 ix86_preferred_simd_mode
52114 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
52115 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
52116 ix86_autovectorize_vector_sizes
52117 #undef TARGET_VECTORIZE_INIT_COST
52118 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
52119 #undef TARGET_VECTORIZE_ADD_STMT_COST
52120 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
52121 #undef TARGET_VECTORIZE_FINISH_COST
52122 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
52123 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
52124 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
52126 #undef TARGET_SET_CURRENT_FUNCTION
52127 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
52129 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
52130 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
52132 #undef TARGET_OPTION_SAVE
52133 #define TARGET_OPTION_SAVE ix86_function_specific_save
52135 #undef TARGET_OPTION_RESTORE
52136 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
52138 #undef TARGET_OPTION_POST_STREAM_IN
52139 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
52141 #undef TARGET_OPTION_PRINT
52142 #define TARGET_OPTION_PRINT ix86_function_specific_print
52144 #undef TARGET_OPTION_FUNCTION_VERSIONS
52145 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
52147 #undef TARGET_CAN_INLINE_P
52148 #define TARGET_CAN_INLINE_P ix86_can_inline_p
52150 #undef TARGET_EXPAND_TO_RTL_HOOK
52151 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
52153 #undef TARGET_LEGITIMATE_ADDRESS_P
52154 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
52156 #undef TARGET_LRA_P
52157 #define TARGET_LRA_P hook_bool_void_true
52159 #undef TARGET_REGISTER_PRIORITY
52160 #define TARGET_REGISTER_PRIORITY ix86_register_priority
52162 #undef TARGET_REGISTER_USAGE_LEVELING_P
52163 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
52165 #undef TARGET_LEGITIMATE_CONSTANT_P
52166 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
52168 #undef TARGET_FRAME_POINTER_REQUIRED
52169 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
52171 #undef TARGET_CAN_ELIMINATE
52172 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
52174 #undef TARGET_EXTRA_LIVE_ON_ENTRY
52175 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
52177 #undef TARGET_ASM_CODE_END
52178 #define TARGET_ASM_CODE_END ix86_code_end
52180 #undef TARGET_CONDITIONAL_REGISTER_USAGE
52181 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
52183 #if TARGET_MACHO
52184 #undef TARGET_INIT_LIBFUNCS
52185 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
52186 #endif
52188 #undef TARGET_LOOP_UNROLL_ADJUST
52189 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
52191 #undef TARGET_SPILL_CLASS
52192 #define TARGET_SPILL_CLASS ix86_spill_class
52194 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
52195 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
52196 ix86_simd_clone_compute_vecsize_and_simdlen
52198 #undef TARGET_SIMD_CLONE_ADJUST
52199 #define TARGET_SIMD_CLONE_ADJUST \
52200 ix86_simd_clone_adjust
52202 #undef TARGET_SIMD_CLONE_USABLE
52203 #define TARGET_SIMD_CLONE_USABLE \
52204 ix86_simd_clone_usable
52206 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
52207 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
52208 ix86_float_exceptions_rounding_supported_p
52210 #undef TARGET_MODE_EMIT
52211 #define TARGET_MODE_EMIT ix86_emit_mode_set
52213 #undef TARGET_MODE_NEEDED
52214 #define TARGET_MODE_NEEDED ix86_mode_needed
52216 #undef TARGET_MODE_AFTER
52217 #define TARGET_MODE_AFTER ix86_mode_after
52219 #undef TARGET_MODE_ENTRY
52220 #define TARGET_MODE_ENTRY ix86_mode_entry
52222 #undef TARGET_MODE_EXIT
52223 #define TARGET_MODE_EXIT ix86_mode_exit
52225 #undef TARGET_MODE_PRIORITY
52226 #define TARGET_MODE_PRIORITY ix86_mode_priority
52228 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
52229 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
52231 #undef TARGET_LOAD_BOUNDS_FOR_ARG
52232 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
52234 #undef TARGET_STORE_BOUNDS_FOR_ARG
52235 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
52237 #undef TARGET_LOAD_RETURNED_BOUNDS
52238 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
52240 #undef TARGET_STORE_RETURNED_BOUNDS
52241 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
52243 #undef TARGET_CHKP_BOUND_MODE
52244 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
52246 #undef TARGET_BUILTIN_CHKP_FUNCTION
52247 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
52249 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
52250 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
52252 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
52253 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
52255 #undef TARGET_CHKP_INITIALIZE_BOUNDS
52256 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
52258 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
52259 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
52261 #undef TARGET_OFFLOAD_OPTIONS
52262 #define TARGET_OFFLOAD_OPTIONS \
52263 ix86_offload_options
52265 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
52266 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
52268 struct gcc_target targetm = TARGET_INITIALIZER;
52270 #include "gt-i386.h"