i386.c (ix86_register_priority): Use AX_REG.
[official-gcc.git] / gcc / config / i386 / i386.c
blobd8d9983009d29df1ef1ea10ddce439a3528062d7
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "hash-set.h"
26 #include "machmode.h"
27 #include "vec.h"
28 #include "double-int.h"
29 #include "input.h"
30 #include "alias.h"
31 #include "symtab.h"
32 #include "wide-int.h"
33 #include "inchash.h"
34 #include "tree.h"
35 #include "fold-const.h"
36 #include "stringpool.h"
37 #include "attribs.h"
38 #include "calls.h"
39 #include "stor-layout.h"
40 #include "varasm.h"
41 #include "tm_p.h"
42 #include "regs.h"
43 #include "hard-reg-set.h"
44 #include "insn-config.h"
45 #include "conditions.h"
46 #include "output.h"
47 #include "insn-codes.h"
48 #include "insn-attr.h"
49 #include "flags.h"
50 #include "except.h"
51 #include "function.h"
52 #include "recog.h"
53 #include "hashtab.h"
54 #include "statistics.h"
55 #include "real.h"
56 #include "fixed-value.h"
57 #include "expmed.h"
58 #include "dojump.h"
59 #include "explow.h"
60 #include "emit-rtl.h"
61 #include "stmt.h"
62 #include "expr.h"
63 #include "optabs.h"
64 #include "diagnostic-core.h"
65 #include "toplev.h"
66 #include "predict.h"
67 #include "dominance.h"
68 #include "cfg.h"
69 #include "cfgrtl.h"
70 #include "cfganal.h"
71 #include "lcm.h"
72 #include "cfgbuild.h"
73 #include "cfgcleanup.h"
74 #include "basic-block.h"
75 #include "ggc.h"
76 #include "target.h"
77 #include "target-def.h"
78 #include "common/common-target.h"
79 #include "langhooks.h"
80 #include "reload.h"
81 #include "hash-map.h"
82 #include "is-a.h"
83 #include "plugin-api.h"
84 #include "ipa-ref.h"
85 #include "cgraph.h"
86 #include "hash-table.h"
87 #include "tree-ssa-alias.h"
88 #include "internal-fn.h"
89 #include "gimple-fold.h"
90 #include "tree-eh.h"
91 #include "gimple-expr.h"
92 #include "gimple.h"
93 #include "gimplify.h"
94 #include "cfgloop.h"
95 #include "dwarf2.h"
96 #include "df.h"
97 #include "tm-constrs.h"
98 #include "params.h"
99 #include "cselib.h"
100 #include "debug.h"
101 #include "sched-int.h"
102 #include "sbitmap.h"
103 #include "fibheap.h"
104 #include "opts.h"
105 #include "diagnostic.h"
106 #include "dumpfile.h"
107 #include "tree-pass.h"
108 #include "context.h"
109 #include "pass_manager.h"
110 #include "target-globals.h"
111 #include "tree-vectorizer.h"
112 #include "shrink-wrap.h"
113 #include "builtins.h"
114 #include "rtl-iter.h"
115 #include "tree-iterator.h"
116 #include "tree-chkp.h"
117 #include "rtl-chkp.h"
119 static rtx legitimize_dllimport_symbol (rtx, bool);
120 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
121 static rtx legitimize_pe_coff_symbol (rtx, bool);
123 #ifndef CHECK_STACK_LIMIT
124 #define CHECK_STACK_LIMIT (-1)
125 #endif
127 /* Return index of given mode in mult and division cost tables. */
128 #define MODE_INDEX(mode) \
129 ((mode) == QImode ? 0 \
130 : (mode) == HImode ? 1 \
131 : (mode) == SImode ? 2 \
132 : (mode) == DImode ? 3 \
133 : 4)
135 /* Processor costs (relative to an add) */
136 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
137 #define COSTS_N_BYTES(N) ((N) * 2)
139 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
141 static stringop_algs ix86_size_memcpy[2] = {
142 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
143 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
144 static stringop_algs ix86_size_memset[2] = {
145 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
146 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
148 const
149 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
150 COSTS_N_BYTES (2), /* cost of an add instruction */
151 COSTS_N_BYTES (3), /* cost of a lea instruction */
152 COSTS_N_BYTES (2), /* variable shift costs */
153 COSTS_N_BYTES (3), /* constant shift costs */
154 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
155 COSTS_N_BYTES (3), /* HI */
156 COSTS_N_BYTES (3), /* SI */
157 COSTS_N_BYTES (3), /* DI */
158 COSTS_N_BYTES (5)}, /* other */
159 0, /* cost of multiply per each bit set */
160 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
161 COSTS_N_BYTES (3), /* HI */
162 COSTS_N_BYTES (3), /* SI */
163 COSTS_N_BYTES (3), /* DI */
164 COSTS_N_BYTES (5)}, /* other */
165 COSTS_N_BYTES (3), /* cost of movsx */
166 COSTS_N_BYTES (3), /* cost of movzx */
167 0, /* "large" insn */
168 2, /* MOVE_RATIO */
169 2, /* cost for loading QImode using movzbl */
170 {2, 2, 2}, /* cost of loading integer registers
171 in QImode, HImode and SImode.
172 Relative to reg-reg move (2). */
173 {2, 2, 2}, /* cost of storing integer registers */
174 2, /* cost of reg,reg fld/fst */
175 {2, 2, 2}, /* cost of loading fp registers
176 in SFmode, DFmode and XFmode */
177 {2, 2, 2}, /* cost of storing fp registers
178 in SFmode, DFmode and XFmode */
179 3, /* cost of moving MMX register */
180 {3, 3}, /* cost of loading MMX registers
181 in SImode and DImode */
182 {3, 3}, /* cost of storing MMX registers
183 in SImode and DImode */
184 3, /* cost of moving SSE register */
185 {3, 3, 3}, /* cost of loading SSE registers
186 in SImode, DImode and TImode */
187 {3, 3, 3}, /* cost of storing SSE registers
188 in SImode, DImode and TImode */
189 3, /* MMX or SSE register to integer */
190 0, /* size of l1 cache */
191 0, /* size of l2 cache */
192 0, /* size of prefetch block */
193 0, /* number of parallel prefetches */
194 2, /* Branch cost */
195 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
196 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
197 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
198 COSTS_N_BYTES (2), /* cost of FABS instruction. */
199 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
200 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
201 ix86_size_memcpy,
202 ix86_size_memset,
203 1, /* scalar_stmt_cost. */
204 1, /* scalar load_cost. */
205 1, /* scalar_store_cost. */
206 1, /* vec_stmt_cost. */
207 1, /* vec_to_scalar_cost. */
208 1, /* scalar_to_vec_cost. */
209 1, /* vec_align_load_cost. */
210 1, /* vec_unalign_load_cost. */
211 1, /* vec_store_cost. */
212 1, /* cond_taken_branch_cost. */
213 1, /* cond_not_taken_branch_cost. */
216 /* Processor costs (relative to an add) */
217 static stringop_algs i386_memcpy[2] = {
218 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
219 DUMMY_STRINGOP_ALGS};
220 static stringop_algs i386_memset[2] = {
221 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
222 DUMMY_STRINGOP_ALGS};
224 static const
225 struct processor_costs i386_cost = { /* 386 specific costs */
226 COSTS_N_INSNS (1), /* cost of an add instruction */
227 COSTS_N_INSNS (1), /* cost of a lea instruction */
228 COSTS_N_INSNS (3), /* variable shift costs */
229 COSTS_N_INSNS (2), /* constant shift costs */
230 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
231 COSTS_N_INSNS (6), /* HI */
232 COSTS_N_INSNS (6), /* SI */
233 COSTS_N_INSNS (6), /* DI */
234 COSTS_N_INSNS (6)}, /* other */
235 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
236 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
237 COSTS_N_INSNS (23), /* HI */
238 COSTS_N_INSNS (23), /* SI */
239 COSTS_N_INSNS (23), /* DI */
240 COSTS_N_INSNS (23)}, /* other */
241 COSTS_N_INSNS (3), /* cost of movsx */
242 COSTS_N_INSNS (2), /* cost of movzx */
243 15, /* "large" insn */
244 3, /* MOVE_RATIO */
245 4, /* cost for loading QImode using movzbl */
246 {2, 4, 2}, /* cost of loading integer registers
247 in QImode, HImode and SImode.
248 Relative to reg-reg move (2). */
249 {2, 4, 2}, /* cost of storing integer registers */
250 2, /* cost of reg,reg fld/fst */
251 {8, 8, 8}, /* cost of loading fp registers
252 in SFmode, DFmode and XFmode */
253 {8, 8, 8}, /* cost of storing fp registers
254 in SFmode, DFmode and XFmode */
255 2, /* cost of moving MMX register */
256 {4, 8}, /* cost of loading MMX registers
257 in SImode and DImode */
258 {4, 8}, /* cost of storing MMX registers
259 in SImode and DImode */
260 2, /* cost of moving SSE register */
261 {4, 8, 16}, /* cost of loading SSE registers
262 in SImode, DImode and TImode */
263 {4, 8, 16}, /* cost of storing SSE registers
264 in SImode, DImode and TImode */
265 3, /* MMX or SSE register to integer */
266 0, /* size of l1 cache */
267 0, /* size of l2 cache */
268 0, /* size of prefetch block */
269 0, /* number of parallel prefetches */
270 1, /* Branch cost */
271 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
272 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
273 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
274 COSTS_N_INSNS (22), /* cost of FABS instruction. */
275 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
276 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
277 i386_memcpy,
278 i386_memset,
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
292 static stringop_algs i486_memcpy[2] = {
293 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
294 DUMMY_STRINGOP_ALGS};
295 static stringop_algs i486_memset[2] = {
296 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
297 DUMMY_STRINGOP_ALGS};
299 static const
300 struct processor_costs i486_cost = { /* 486 specific costs */
301 COSTS_N_INSNS (1), /* cost of an add instruction */
302 COSTS_N_INSNS (1), /* cost of a lea instruction */
303 COSTS_N_INSNS (3), /* variable shift costs */
304 COSTS_N_INSNS (2), /* constant shift costs */
305 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
306 COSTS_N_INSNS (12), /* HI */
307 COSTS_N_INSNS (12), /* SI */
308 COSTS_N_INSNS (12), /* DI */
309 COSTS_N_INSNS (12)}, /* other */
310 1, /* cost of multiply per each bit set */
311 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
312 COSTS_N_INSNS (40), /* HI */
313 COSTS_N_INSNS (40), /* SI */
314 COSTS_N_INSNS (40), /* DI */
315 COSTS_N_INSNS (40)}, /* other */
316 COSTS_N_INSNS (3), /* cost of movsx */
317 COSTS_N_INSNS (2), /* cost of movzx */
318 15, /* "large" insn */
319 3, /* MOVE_RATIO */
320 4, /* cost for loading QImode using movzbl */
321 {2, 4, 2}, /* cost of loading integer registers
322 in QImode, HImode and SImode.
323 Relative to reg-reg move (2). */
324 {2, 4, 2}, /* cost of storing integer registers */
325 2, /* cost of reg,reg fld/fst */
326 {8, 8, 8}, /* cost of loading fp registers
327 in SFmode, DFmode and XFmode */
328 {8, 8, 8}, /* cost of storing fp registers
329 in SFmode, DFmode and XFmode */
330 2, /* cost of moving MMX register */
331 {4, 8}, /* cost of loading MMX registers
332 in SImode and DImode */
333 {4, 8}, /* cost of storing MMX registers
334 in SImode and DImode */
335 2, /* cost of moving SSE register */
336 {4, 8, 16}, /* cost of loading SSE registers
337 in SImode, DImode and TImode */
338 {4, 8, 16}, /* cost of storing SSE registers
339 in SImode, DImode and TImode */
340 3, /* MMX or SSE register to integer */
341 4, /* size of l1 cache. 486 has 8kB cache
342 shared for code and data, so 4kB is
343 not really precise. */
344 4, /* size of l2 cache */
345 0, /* size of prefetch block */
346 0, /* number of parallel prefetches */
347 1, /* Branch cost */
348 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
349 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
350 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
351 COSTS_N_INSNS (3), /* cost of FABS instruction. */
352 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
353 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
354 i486_memcpy,
355 i486_memset,
356 1, /* scalar_stmt_cost. */
357 1, /* scalar load_cost. */
358 1, /* scalar_store_cost. */
359 1, /* vec_stmt_cost. */
360 1, /* vec_to_scalar_cost. */
361 1, /* scalar_to_vec_cost. */
362 1, /* vec_align_load_cost. */
363 2, /* vec_unalign_load_cost. */
364 1, /* vec_store_cost. */
365 3, /* cond_taken_branch_cost. */
366 1, /* cond_not_taken_branch_cost. */
369 static stringop_algs pentium_memcpy[2] = {
370 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
371 DUMMY_STRINGOP_ALGS};
372 static stringop_algs pentium_memset[2] = {
373 {libcall, {{-1, rep_prefix_4_byte, false}}},
374 DUMMY_STRINGOP_ALGS};
376 static const
377 struct processor_costs pentium_cost = {
378 COSTS_N_INSNS (1), /* cost of an add instruction */
379 COSTS_N_INSNS (1), /* cost of a lea instruction */
380 COSTS_N_INSNS (4), /* variable shift costs */
381 COSTS_N_INSNS (1), /* constant shift costs */
382 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
383 COSTS_N_INSNS (11), /* HI */
384 COSTS_N_INSNS (11), /* SI */
385 COSTS_N_INSNS (11), /* DI */
386 COSTS_N_INSNS (11)}, /* other */
387 0, /* cost of multiply per each bit set */
388 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
389 COSTS_N_INSNS (25), /* HI */
390 COSTS_N_INSNS (25), /* SI */
391 COSTS_N_INSNS (25), /* DI */
392 COSTS_N_INSNS (25)}, /* other */
393 COSTS_N_INSNS (3), /* cost of movsx */
394 COSTS_N_INSNS (2), /* cost of movzx */
395 8, /* "large" insn */
396 6, /* MOVE_RATIO */
397 6, /* cost for loading QImode using movzbl */
398 {2, 4, 2}, /* cost of loading integer registers
399 in QImode, HImode and SImode.
400 Relative to reg-reg move (2). */
401 {2, 4, 2}, /* cost of storing integer registers */
402 2, /* cost of reg,reg fld/fst */
403 {2, 2, 6}, /* cost of loading fp registers
404 in SFmode, DFmode and XFmode */
405 {4, 4, 6}, /* cost of storing fp registers
406 in SFmode, DFmode and XFmode */
407 8, /* cost of moving MMX register */
408 {8, 8}, /* cost of loading MMX registers
409 in SImode and DImode */
410 {8, 8}, /* cost of storing MMX registers
411 in SImode and DImode */
412 2, /* cost of moving SSE register */
413 {4, 8, 16}, /* cost of loading SSE registers
414 in SImode, DImode and TImode */
415 {4, 8, 16}, /* cost of storing SSE registers
416 in SImode, DImode and TImode */
417 3, /* MMX or SSE register to integer */
418 8, /* size of l1 cache. */
419 8, /* size of l2 cache */
420 0, /* size of prefetch block */
421 0, /* number of parallel prefetches */
422 2, /* Branch cost */
423 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
424 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
425 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
426 COSTS_N_INSNS (1), /* cost of FABS instruction. */
427 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
428 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
429 pentium_memcpy,
430 pentium_memset,
431 1, /* scalar_stmt_cost. */
432 1, /* scalar load_cost. */
433 1, /* scalar_store_cost. */
434 1, /* vec_stmt_cost. */
435 1, /* vec_to_scalar_cost. */
436 1, /* scalar_to_vec_cost. */
437 1, /* vec_align_load_cost. */
438 2, /* vec_unalign_load_cost. */
439 1, /* vec_store_cost. */
440 3, /* cond_taken_branch_cost. */
441 1, /* cond_not_taken_branch_cost. */
444 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
445 (we ensure the alignment). For small blocks inline loop is still a
446 noticeable win, for bigger blocks either rep movsl or rep movsb is
447 way to go. Rep movsb has apparently more expensive startup time in CPU,
448 but after 4K the difference is down in the noise. */
449 static stringop_algs pentiumpro_memcpy[2] = {
450 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
451 {8192, rep_prefix_4_byte, false},
452 {-1, rep_prefix_1_byte, false}}},
453 DUMMY_STRINGOP_ALGS};
454 static stringop_algs pentiumpro_memset[2] = {
455 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
456 {8192, rep_prefix_4_byte, false},
457 {-1, libcall, false}}},
458 DUMMY_STRINGOP_ALGS};
459 static const
460 struct processor_costs pentiumpro_cost = {
461 COSTS_N_INSNS (1), /* cost of an add instruction */
462 COSTS_N_INSNS (1), /* cost of a lea instruction */
463 COSTS_N_INSNS (1), /* variable shift costs */
464 COSTS_N_INSNS (1), /* constant shift costs */
465 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
466 COSTS_N_INSNS (4), /* HI */
467 COSTS_N_INSNS (4), /* SI */
468 COSTS_N_INSNS (4), /* DI */
469 COSTS_N_INSNS (4)}, /* other */
470 0, /* cost of multiply per each bit set */
471 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
472 COSTS_N_INSNS (17), /* HI */
473 COSTS_N_INSNS (17), /* SI */
474 COSTS_N_INSNS (17), /* DI */
475 COSTS_N_INSNS (17)}, /* other */
476 COSTS_N_INSNS (1), /* cost of movsx */
477 COSTS_N_INSNS (1), /* cost of movzx */
478 8, /* "large" insn */
479 6, /* MOVE_RATIO */
480 2, /* cost for loading QImode using movzbl */
481 {4, 4, 4}, /* cost of loading integer registers
482 in QImode, HImode and SImode.
483 Relative to reg-reg move (2). */
484 {2, 2, 2}, /* cost of storing integer registers */
485 2, /* cost of reg,reg fld/fst */
486 {2, 2, 6}, /* cost of loading fp registers
487 in SFmode, DFmode and XFmode */
488 {4, 4, 6}, /* cost of storing fp registers
489 in SFmode, DFmode and XFmode */
490 2, /* cost of moving MMX register */
491 {2, 2}, /* cost of loading MMX registers
492 in SImode and DImode */
493 {2, 2}, /* cost of storing MMX registers
494 in SImode and DImode */
495 2, /* cost of moving SSE register */
496 {2, 2, 8}, /* cost of loading SSE registers
497 in SImode, DImode and TImode */
498 {2, 2, 8}, /* cost of storing SSE registers
499 in SImode, DImode and TImode */
500 3, /* MMX or SSE register to integer */
501 8, /* size of l1 cache. */
502 256, /* size of l2 cache */
503 32, /* size of prefetch block */
504 6, /* number of parallel prefetches */
505 2, /* Branch cost */
506 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
507 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
508 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
509 COSTS_N_INSNS (2), /* cost of FABS instruction. */
510 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
511 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
512 pentiumpro_memcpy,
513 pentiumpro_memset,
514 1, /* scalar_stmt_cost. */
515 1, /* scalar load_cost. */
516 1, /* scalar_store_cost. */
517 1, /* vec_stmt_cost. */
518 1, /* vec_to_scalar_cost. */
519 1, /* scalar_to_vec_cost. */
520 1, /* vec_align_load_cost. */
521 2, /* vec_unalign_load_cost. */
522 1, /* vec_store_cost. */
523 3, /* cond_taken_branch_cost. */
524 1, /* cond_not_taken_branch_cost. */
527 static stringop_algs geode_memcpy[2] = {
528 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
529 DUMMY_STRINGOP_ALGS};
530 static stringop_algs geode_memset[2] = {
531 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
532 DUMMY_STRINGOP_ALGS};
533 static const
534 struct processor_costs geode_cost = {
535 COSTS_N_INSNS (1), /* cost of an add instruction */
536 COSTS_N_INSNS (1), /* cost of a lea instruction */
537 COSTS_N_INSNS (2), /* variable shift costs */
538 COSTS_N_INSNS (1), /* constant shift costs */
539 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
540 COSTS_N_INSNS (4), /* HI */
541 COSTS_N_INSNS (7), /* SI */
542 COSTS_N_INSNS (7), /* DI */
543 COSTS_N_INSNS (7)}, /* other */
544 0, /* cost of multiply per each bit set */
545 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
546 COSTS_N_INSNS (23), /* HI */
547 COSTS_N_INSNS (39), /* SI */
548 COSTS_N_INSNS (39), /* DI */
549 COSTS_N_INSNS (39)}, /* other */
550 COSTS_N_INSNS (1), /* cost of movsx */
551 COSTS_N_INSNS (1), /* cost of movzx */
552 8, /* "large" insn */
553 4, /* MOVE_RATIO */
554 1, /* cost for loading QImode using movzbl */
555 {1, 1, 1}, /* cost of loading integer registers
556 in QImode, HImode and SImode.
557 Relative to reg-reg move (2). */
558 {1, 1, 1}, /* cost of storing integer registers */
559 1, /* cost of reg,reg fld/fst */
560 {1, 1, 1}, /* cost of loading fp registers
561 in SFmode, DFmode and XFmode */
562 {4, 6, 6}, /* cost of storing fp registers
563 in SFmode, DFmode and XFmode */
565 1, /* cost of moving MMX register */
566 {1, 1}, /* cost of loading MMX registers
567 in SImode and DImode */
568 {1, 1}, /* cost of storing MMX registers
569 in SImode and DImode */
570 1, /* cost of moving SSE register */
571 {1, 1, 1}, /* cost of loading SSE registers
572 in SImode, DImode and TImode */
573 {1, 1, 1}, /* cost of storing SSE registers
574 in SImode, DImode and TImode */
575 1, /* MMX or SSE register to integer */
576 64, /* size of l1 cache. */
577 128, /* size of l2 cache. */
578 32, /* size of prefetch block */
579 1, /* number of parallel prefetches */
580 1, /* Branch cost */
581 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
582 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
583 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
584 COSTS_N_INSNS (1), /* cost of FABS instruction. */
585 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
586 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
587 geode_memcpy,
588 geode_memset,
589 1, /* scalar_stmt_cost. */
590 1, /* scalar load_cost. */
591 1, /* scalar_store_cost. */
592 1, /* vec_stmt_cost. */
593 1, /* vec_to_scalar_cost. */
594 1, /* scalar_to_vec_cost. */
595 1, /* vec_align_load_cost. */
596 2, /* vec_unalign_load_cost. */
597 1, /* vec_store_cost. */
598 3, /* cond_taken_branch_cost. */
599 1, /* cond_not_taken_branch_cost. */
602 static stringop_algs k6_memcpy[2] = {
603 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
604 DUMMY_STRINGOP_ALGS};
605 static stringop_algs k6_memset[2] = {
606 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
607 DUMMY_STRINGOP_ALGS};
608 static const
609 struct processor_costs k6_cost = {
610 COSTS_N_INSNS (1), /* cost of an add instruction */
611 COSTS_N_INSNS (2), /* cost of a lea instruction */
612 COSTS_N_INSNS (1), /* variable shift costs */
613 COSTS_N_INSNS (1), /* constant shift costs */
614 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
615 COSTS_N_INSNS (3), /* HI */
616 COSTS_N_INSNS (3), /* SI */
617 COSTS_N_INSNS (3), /* DI */
618 COSTS_N_INSNS (3)}, /* other */
619 0, /* cost of multiply per each bit set */
620 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
621 COSTS_N_INSNS (18), /* HI */
622 COSTS_N_INSNS (18), /* SI */
623 COSTS_N_INSNS (18), /* DI */
624 COSTS_N_INSNS (18)}, /* other */
625 COSTS_N_INSNS (2), /* cost of movsx */
626 COSTS_N_INSNS (2), /* cost of movzx */
627 8, /* "large" insn */
628 4, /* MOVE_RATIO */
629 3, /* cost for loading QImode using movzbl */
630 {4, 5, 4}, /* cost of loading integer registers
631 in QImode, HImode and SImode.
632 Relative to reg-reg move (2). */
633 {2, 3, 2}, /* cost of storing integer registers */
634 4, /* cost of reg,reg fld/fst */
635 {6, 6, 6}, /* cost of loading fp registers
636 in SFmode, DFmode and XFmode */
637 {4, 4, 4}, /* cost of storing fp registers
638 in SFmode, DFmode and XFmode */
639 2, /* cost of moving MMX register */
640 {2, 2}, /* cost of loading MMX registers
641 in SImode and DImode */
642 {2, 2}, /* cost of storing MMX registers
643 in SImode and DImode */
644 2, /* cost of moving SSE register */
645 {2, 2, 8}, /* cost of loading SSE registers
646 in SImode, DImode and TImode */
647 {2, 2, 8}, /* cost of storing SSE registers
648 in SImode, DImode and TImode */
649 6, /* MMX or SSE register to integer */
650 32, /* size of l1 cache. */
651 32, /* size of l2 cache. Some models
652 have integrated l2 cache, but
653 optimizing for k6 is not important
654 enough to worry about that. */
655 32, /* size of prefetch block */
656 1, /* number of parallel prefetches */
657 1, /* Branch cost */
658 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
659 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
660 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
661 COSTS_N_INSNS (2), /* cost of FABS instruction. */
662 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
663 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
664 k6_memcpy,
665 k6_memset,
666 1, /* scalar_stmt_cost. */
667 1, /* scalar load_cost. */
668 1, /* scalar_store_cost. */
669 1, /* vec_stmt_cost. */
670 1, /* vec_to_scalar_cost. */
671 1, /* scalar_to_vec_cost. */
672 1, /* vec_align_load_cost. */
673 2, /* vec_unalign_load_cost. */
674 1, /* vec_store_cost. */
675 3, /* cond_taken_branch_cost. */
676 1, /* cond_not_taken_branch_cost. */
679 /* For some reason, Athlon deals better with REP prefix (relative to loops)
680 compared to K8. Alignment becomes important after 8 bytes for memcpy and
681 128 bytes for memset. */
682 static stringop_algs athlon_memcpy[2] = {
683 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
684 DUMMY_STRINGOP_ALGS};
685 static stringop_algs athlon_memset[2] = {
686 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
687 DUMMY_STRINGOP_ALGS};
688 static const
689 struct processor_costs athlon_cost = {
690 COSTS_N_INSNS (1), /* cost of an add instruction */
691 COSTS_N_INSNS (2), /* cost of a lea instruction */
692 COSTS_N_INSNS (1), /* variable shift costs */
693 COSTS_N_INSNS (1), /* constant shift costs */
694 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
695 COSTS_N_INSNS (5), /* HI */
696 COSTS_N_INSNS (5), /* SI */
697 COSTS_N_INSNS (5), /* DI */
698 COSTS_N_INSNS (5)}, /* other */
699 0, /* cost of multiply per each bit set */
700 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
701 COSTS_N_INSNS (26), /* HI */
702 COSTS_N_INSNS (42), /* SI */
703 COSTS_N_INSNS (74), /* DI */
704 COSTS_N_INSNS (74)}, /* other */
705 COSTS_N_INSNS (1), /* cost of movsx */
706 COSTS_N_INSNS (1), /* cost of movzx */
707 8, /* "large" insn */
708 9, /* MOVE_RATIO */
709 4, /* cost for loading QImode using movzbl */
710 {3, 4, 3}, /* cost of loading integer registers
711 in QImode, HImode and SImode.
712 Relative to reg-reg move (2). */
713 {3, 4, 3}, /* cost of storing integer registers */
714 4, /* cost of reg,reg fld/fst */
715 {4, 4, 12}, /* cost of loading fp registers
716 in SFmode, DFmode and XFmode */
717 {6, 6, 8}, /* cost of storing fp registers
718 in SFmode, DFmode and XFmode */
719 2, /* cost of moving MMX register */
720 {4, 4}, /* cost of loading MMX registers
721 in SImode and DImode */
722 {4, 4}, /* cost of storing MMX registers
723 in SImode and DImode */
724 2, /* cost of moving SSE register */
725 {4, 4, 6}, /* cost of loading SSE registers
726 in SImode, DImode and TImode */
727 {4, 4, 5}, /* cost of storing SSE registers
728 in SImode, DImode and TImode */
729 5, /* MMX or SSE register to integer */
730 64, /* size of l1 cache. */
731 256, /* size of l2 cache. */
732 64, /* size of prefetch block */
733 6, /* number of parallel prefetches */
734 5, /* Branch cost */
735 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
736 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
737 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
738 COSTS_N_INSNS (2), /* cost of FABS instruction. */
739 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
740 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
741 athlon_memcpy,
742 athlon_memset,
743 1, /* scalar_stmt_cost. */
744 1, /* scalar load_cost. */
745 1, /* scalar_store_cost. */
746 1, /* vec_stmt_cost. */
747 1, /* vec_to_scalar_cost. */
748 1, /* scalar_to_vec_cost. */
749 1, /* vec_align_load_cost. */
750 2, /* vec_unalign_load_cost. */
751 1, /* vec_store_cost. */
752 3, /* cond_taken_branch_cost. */
753 1, /* cond_not_taken_branch_cost. */
756 /* K8 has optimized REP instruction for medium sized blocks, but for very
757 small blocks it is better to use loop. For large blocks, libcall can
758 do nontemporary accesses and beat inline considerably. */
759 static stringop_algs k8_memcpy[2] = {
760 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
761 {-1, rep_prefix_4_byte, false}}},
762 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
763 {-1, libcall, false}}}};
764 static stringop_algs k8_memset[2] = {
765 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
766 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
767 {libcall, {{48, unrolled_loop, false},
768 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
769 static const
770 struct processor_costs k8_cost = {
771 COSTS_N_INSNS (1), /* cost of an add instruction */
772 COSTS_N_INSNS (2), /* cost of a lea instruction */
773 COSTS_N_INSNS (1), /* variable shift costs */
774 COSTS_N_INSNS (1), /* constant shift costs */
775 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
776 COSTS_N_INSNS (4), /* HI */
777 COSTS_N_INSNS (3), /* SI */
778 COSTS_N_INSNS (4), /* DI */
779 COSTS_N_INSNS (5)}, /* other */
780 0, /* cost of multiply per each bit set */
781 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
782 COSTS_N_INSNS (26), /* HI */
783 COSTS_N_INSNS (42), /* SI */
784 COSTS_N_INSNS (74), /* DI */
785 COSTS_N_INSNS (74)}, /* other */
786 COSTS_N_INSNS (1), /* cost of movsx */
787 COSTS_N_INSNS (1), /* cost of movzx */
788 8, /* "large" insn */
789 9, /* MOVE_RATIO */
790 4, /* cost for loading QImode using movzbl */
791 {3, 4, 3}, /* cost of loading integer registers
792 in QImode, HImode and SImode.
793 Relative to reg-reg move (2). */
794 {3, 4, 3}, /* cost of storing integer registers */
795 4, /* cost of reg,reg fld/fst */
796 {4, 4, 12}, /* cost of loading fp registers
797 in SFmode, DFmode and XFmode */
798 {6, 6, 8}, /* cost of storing fp registers
799 in SFmode, DFmode and XFmode */
800 2, /* cost of moving MMX register */
801 {3, 3}, /* cost of loading MMX registers
802 in SImode and DImode */
803 {4, 4}, /* cost of storing MMX registers
804 in SImode and DImode */
805 2, /* cost of moving SSE register */
806 {4, 3, 6}, /* cost of loading SSE registers
807 in SImode, DImode and TImode */
808 {4, 4, 5}, /* cost of storing SSE registers
809 in SImode, DImode and TImode */
810 5, /* MMX or SSE register to integer */
811 64, /* size of l1 cache. */
812 512, /* size of l2 cache. */
813 64, /* size of prefetch block */
814 /* New AMD processors never drop prefetches; if they cannot be performed
815 immediately, they are queued. We set number of simultaneous prefetches
816 to a large constant to reflect this (it probably is not a good idea not
817 to limit number of prefetches at all, as their execution also takes some
818 time). */
819 100, /* number of parallel prefetches */
820 3, /* Branch cost */
821 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
822 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
823 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
824 COSTS_N_INSNS (2), /* cost of FABS instruction. */
825 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
826 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
828 k8_memcpy,
829 k8_memset,
830 4, /* scalar_stmt_cost. */
831 2, /* scalar load_cost. */
832 2, /* scalar_store_cost. */
833 5, /* vec_stmt_cost. */
834 0, /* vec_to_scalar_cost. */
835 2, /* scalar_to_vec_cost. */
836 2, /* vec_align_load_cost. */
837 3, /* vec_unalign_load_cost. */
838 3, /* vec_store_cost. */
839 3, /* cond_taken_branch_cost. */
840 2, /* cond_not_taken_branch_cost. */
843 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
844 very small blocks it is better to use loop. For large blocks, libcall can
845 do nontemporary accesses and beat inline considerably. */
846 static stringop_algs amdfam10_memcpy[2] = {
847 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
848 {-1, rep_prefix_4_byte, false}}},
849 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
850 {-1, libcall, false}}}};
851 static stringop_algs amdfam10_memset[2] = {
852 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
853 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
854 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
855 {-1, libcall, false}}}};
856 struct processor_costs amdfam10_cost = {
857 COSTS_N_INSNS (1), /* cost of an add instruction */
858 COSTS_N_INSNS (2), /* cost of a lea instruction */
859 COSTS_N_INSNS (1), /* variable shift costs */
860 COSTS_N_INSNS (1), /* constant shift costs */
861 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
862 COSTS_N_INSNS (4), /* HI */
863 COSTS_N_INSNS (3), /* SI */
864 COSTS_N_INSNS (4), /* DI */
865 COSTS_N_INSNS (5)}, /* other */
866 0, /* cost of multiply per each bit set */
867 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
868 COSTS_N_INSNS (35), /* HI */
869 COSTS_N_INSNS (51), /* SI */
870 COSTS_N_INSNS (83), /* DI */
871 COSTS_N_INSNS (83)}, /* other */
872 COSTS_N_INSNS (1), /* cost of movsx */
873 COSTS_N_INSNS (1), /* cost of movzx */
874 8, /* "large" insn */
875 9, /* MOVE_RATIO */
876 4, /* cost for loading QImode using movzbl */
877 {3, 4, 3}, /* cost of loading integer registers
878 in QImode, HImode and SImode.
879 Relative to reg-reg move (2). */
880 {3, 4, 3}, /* cost of storing integer registers */
881 4, /* cost of reg,reg fld/fst */
882 {4, 4, 12}, /* cost of loading fp registers
883 in SFmode, DFmode and XFmode */
884 {6, 6, 8}, /* cost of storing fp registers
885 in SFmode, DFmode and XFmode */
886 2, /* cost of moving MMX register */
887 {3, 3}, /* cost of loading MMX registers
888 in SImode and DImode */
889 {4, 4}, /* cost of storing MMX registers
890 in SImode and DImode */
891 2, /* cost of moving SSE register */
892 {4, 4, 3}, /* cost of loading SSE registers
893 in SImode, DImode and TImode */
894 {4, 4, 5}, /* cost of storing SSE registers
895 in SImode, DImode and TImode */
896 3, /* MMX or SSE register to integer */
897 /* On K8:
898 MOVD reg64, xmmreg Double FSTORE 4
899 MOVD reg32, xmmreg Double FSTORE 4
900 On AMDFAM10:
901 MOVD reg64, xmmreg Double FADD 3
902 1/1 1/1
903 MOVD reg32, xmmreg Double FADD 3
904 1/1 1/1 */
905 64, /* size of l1 cache. */
906 512, /* size of l2 cache. */
907 64, /* size of prefetch block */
908 /* New AMD processors never drop prefetches; if they cannot be performed
909 immediately, they are queued. We set number of simultaneous prefetches
910 to a large constant to reflect this (it probably is not a good idea not
911 to limit number of prefetches at all, as their execution also takes some
912 time). */
913 100, /* number of parallel prefetches */
914 2, /* Branch cost */
915 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
916 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
917 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
918 COSTS_N_INSNS (2), /* cost of FABS instruction. */
919 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
920 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
922 amdfam10_memcpy,
923 amdfam10_memset,
924 4, /* scalar_stmt_cost. */
925 2, /* scalar load_cost. */
926 2, /* scalar_store_cost. */
927 6, /* vec_stmt_cost. */
928 0, /* vec_to_scalar_cost. */
929 2, /* scalar_to_vec_cost. */
930 2, /* vec_align_load_cost. */
931 2, /* vec_unalign_load_cost. */
932 2, /* vec_store_cost. */
933 2, /* cond_taken_branch_cost. */
934 1, /* cond_not_taken_branch_cost. */
937 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
938 very small blocks it is better to use loop. For large blocks, libcall
939 can do nontemporary accesses and beat inline considerably. */
940 static stringop_algs bdver1_memcpy[2] = {
941 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
942 {-1, rep_prefix_4_byte, false}}},
943 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
944 {-1, libcall, false}}}};
945 static stringop_algs bdver1_memset[2] = {
946 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
947 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
948 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
949 {-1, libcall, false}}}};
951 const struct processor_costs bdver1_cost = {
952 COSTS_N_INSNS (1), /* cost of an add instruction */
953 COSTS_N_INSNS (1), /* cost of a lea instruction */
954 COSTS_N_INSNS (1), /* variable shift costs */
955 COSTS_N_INSNS (1), /* constant shift costs */
956 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
957 COSTS_N_INSNS (4), /* HI */
958 COSTS_N_INSNS (4), /* SI */
959 COSTS_N_INSNS (6), /* DI */
960 COSTS_N_INSNS (6)}, /* other */
961 0, /* cost of multiply per each bit set */
962 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
963 COSTS_N_INSNS (35), /* HI */
964 COSTS_N_INSNS (51), /* SI */
965 COSTS_N_INSNS (83), /* DI */
966 COSTS_N_INSNS (83)}, /* other */
967 COSTS_N_INSNS (1), /* cost of movsx */
968 COSTS_N_INSNS (1), /* cost of movzx */
969 8, /* "large" insn */
970 9, /* MOVE_RATIO */
971 4, /* cost for loading QImode using movzbl */
972 {5, 5, 4}, /* cost of loading integer registers
973 in QImode, HImode and SImode.
974 Relative to reg-reg move (2). */
975 {4, 4, 4}, /* cost of storing integer registers */
976 2, /* cost of reg,reg fld/fst */
977 {5, 5, 12}, /* cost of loading fp registers
978 in SFmode, DFmode and XFmode */
979 {4, 4, 8}, /* cost of storing fp registers
980 in SFmode, DFmode and XFmode */
981 2, /* cost of moving MMX register */
982 {4, 4}, /* cost of loading MMX registers
983 in SImode and DImode */
984 {4, 4}, /* cost of storing MMX registers
985 in SImode and DImode */
986 2, /* cost of moving SSE register */
987 {4, 4, 4}, /* cost of loading SSE registers
988 in SImode, DImode and TImode */
989 {4, 4, 4}, /* cost of storing SSE registers
990 in SImode, DImode and TImode */
991 2, /* MMX or SSE register to integer */
992 /* On K8:
993 MOVD reg64, xmmreg Double FSTORE 4
994 MOVD reg32, xmmreg Double FSTORE 4
995 On AMDFAM10:
996 MOVD reg64, xmmreg Double FADD 3
997 1/1 1/1
998 MOVD reg32, xmmreg Double FADD 3
999 1/1 1/1 */
1000 16, /* size of l1 cache. */
1001 2048, /* size of l2 cache. */
1002 64, /* size of prefetch block */
1003 /* New AMD processors never drop prefetches; if they cannot be performed
1004 immediately, they are queued. We set number of simultaneous prefetches
1005 to a large constant to reflect this (it probably is not a good idea not
1006 to limit number of prefetches at all, as their execution also takes some
1007 time). */
1008 100, /* number of parallel prefetches */
1009 2, /* Branch cost */
1010 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1011 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1012 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1013 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1014 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1015 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1017 bdver1_memcpy,
1018 bdver1_memset,
1019 6, /* scalar_stmt_cost. */
1020 4, /* scalar load_cost. */
1021 4, /* scalar_store_cost. */
1022 6, /* vec_stmt_cost. */
1023 0, /* vec_to_scalar_cost. */
1024 2, /* scalar_to_vec_cost. */
1025 4, /* vec_align_load_cost. */
1026 4, /* vec_unalign_load_cost. */
1027 4, /* vec_store_cost. */
1028 2, /* cond_taken_branch_cost. */
1029 1, /* cond_not_taken_branch_cost. */
1032 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1033 very small blocks it is better to use loop. For large blocks, libcall
1034 can do nontemporary accesses and beat inline considerably. */
1036 static stringop_algs bdver2_memcpy[2] = {
1037 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1038 {-1, rep_prefix_4_byte, false}}},
1039 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1040 {-1, libcall, false}}}};
1041 static stringop_algs bdver2_memset[2] = {
1042 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1043 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1044 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1045 {-1, libcall, false}}}};
1047 const struct processor_costs bdver2_cost = {
1048 COSTS_N_INSNS (1), /* cost of an add instruction */
1049 COSTS_N_INSNS (1), /* cost of a lea instruction */
1050 COSTS_N_INSNS (1), /* variable shift costs */
1051 COSTS_N_INSNS (1), /* constant shift costs */
1052 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1053 COSTS_N_INSNS (4), /* HI */
1054 COSTS_N_INSNS (4), /* SI */
1055 COSTS_N_INSNS (6), /* DI */
1056 COSTS_N_INSNS (6)}, /* other */
1057 0, /* cost of multiply per each bit set */
1058 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1059 COSTS_N_INSNS (35), /* HI */
1060 COSTS_N_INSNS (51), /* SI */
1061 COSTS_N_INSNS (83), /* DI */
1062 COSTS_N_INSNS (83)}, /* other */
1063 COSTS_N_INSNS (1), /* cost of movsx */
1064 COSTS_N_INSNS (1), /* cost of movzx */
1065 8, /* "large" insn */
1066 9, /* MOVE_RATIO */
1067 4, /* cost for loading QImode using movzbl */
1068 {5, 5, 4}, /* cost of loading integer registers
1069 in QImode, HImode and SImode.
1070 Relative to reg-reg move (2). */
1071 {4, 4, 4}, /* cost of storing integer registers */
1072 2, /* cost of reg,reg fld/fst */
1073 {5, 5, 12}, /* cost of loading fp registers
1074 in SFmode, DFmode and XFmode */
1075 {4, 4, 8}, /* cost of storing fp registers
1076 in SFmode, DFmode and XFmode */
1077 2, /* cost of moving MMX register */
1078 {4, 4}, /* cost of loading MMX registers
1079 in SImode and DImode */
1080 {4, 4}, /* cost of storing MMX registers
1081 in SImode and DImode */
1082 2, /* cost of moving SSE register */
1083 {4, 4, 4}, /* cost of loading SSE registers
1084 in SImode, DImode and TImode */
1085 {4, 4, 4}, /* cost of storing SSE registers
1086 in SImode, DImode and TImode */
1087 2, /* MMX or SSE register to integer */
1088 /* On K8:
1089 MOVD reg64, xmmreg Double FSTORE 4
1090 MOVD reg32, xmmreg Double FSTORE 4
1091 On AMDFAM10:
1092 MOVD reg64, xmmreg Double FADD 3
1093 1/1 1/1
1094 MOVD reg32, xmmreg Double FADD 3
1095 1/1 1/1 */
1096 16, /* size of l1 cache. */
1097 2048, /* size of l2 cache. */
1098 64, /* size of prefetch block */
1099 /* New AMD processors never drop prefetches; if they cannot be performed
1100 immediately, they are queued. We set number of simultaneous prefetches
1101 to a large constant to reflect this (it probably is not a good idea not
1102 to limit number of prefetches at all, as their execution also takes some
1103 time). */
1104 100, /* number of parallel prefetches */
1105 2, /* Branch cost */
1106 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1107 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1108 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1109 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1110 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1111 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1113 bdver2_memcpy,
1114 bdver2_memset,
1115 6, /* scalar_stmt_cost. */
1116 4, /* scalar load_cost. */
1117 4, /* scalar_store_cost. */
1118 6, /* vec_stmt_cost. */
1119 0, /* vec_to_scalar_cost. */
1120 2, /* scalar_to_vec_cost. */
1121 4, /* vec_align_load_cost. */
1122 4, /* vec_unalign_load_cost. */
1123 4, /* vec_store_cost. */
1124 2, /* cond_taken_branch_cost. */
1125 1, /* cond_not_taken_branch_cost. */
1129 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1130 very small blocks it is better to use loop. For large blocks, libcall
1131 can do nontemporary accesses and beat inline considerably. */
1132 static stringop_algs bdver3_memcpy[2] = {
1133 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1134 {-1, rep_prefix_4_byte, false}}},
1135 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1136 {-1, libcall, false}}}};
1137 static stringop_algs bdver3_memset[2] = {
1138 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1139 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1140 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1141 {-1, libcall, false}}}};
1142 struct processor_costs bdver3_cost = {
1143 COSTS_N_INSNS (1), /* cost of an add instruction */
1144 COSTS_N_INSNS (1), /* cost of a lea instruction */
1145 COSTS_N_INSNS (1), /* variable shift costs */
1146 COSTS_N_INSNS (1), /* constant shift costs */
1147 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1148 COSTS_N_INSNS (4), /* HI */
1149 COSTS_N_INSNS (4), /* SI */
1150 COSTS_N_INSNS (6), /* DI */
1151 COSTS_N_INSNS (6)}, /* other */
1152 0, /* cost of multiply per each bit set */
1153 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1154 COSTS_N_INSNS (35), /* HI */
1155 COSTS_N_INSNS (51), /* SI */
1156 COSTS_N_INSNS (83), /* DI */
1157 COSTS_N_INSNS (83)}, /* other */
1158 COSTS_N_INSNS (1), /* cost of movsx */
1159 COSTS_N_INSNS (1), /* cost of movzx */
1160 8, /* "large" insn */
1161 9, /* MOVE_RATIO */
1162 4, /* cost for loading QImode using movzbl */
1163 {5, 5, 4}, /* cost of loading integer registers
1164 in QImode, HImode and SImode.
1165 Relative to reg-reg move (2). */
1166 {4, 4, 4}, /* cost of storing integer registers */
1167 2, /* cost of reg,reg fld/fst */
1168 {5, 5, 12}, /* cost of loading fp registers
1169 in SFmode, DFmode and XFmode */
1170 {4, 4, 8}, /* cost of storing fp registers
1171 in SFmode, DFmode and XFmode */
1172 2, /* cost of moving MMX register */
1173 {4, 4}, /* cost of loading MMX registers
1174 in SImode and DImode */
1175 {4, 4}, /* cost of storing MMX registers
1176 in SImode and DImode */
1177 2, /* cost of moving SSE register */
1178 {4, 4, 4}, /* cost of loading SSE registers
1179 in SImode, DImode and TImode */
1180 {4, 4, 4}, /* cost of storing SSE registers
1181 in SImode, DImode and TImode */
1182 2, /* MMX or SSE register to integer */
1183 16, /* size of l1 cache. */
1184 2048, /* size of l2 cache. */
1185 64, /* size of prefetch block */
1186 /* New AMD processors never drop prefetches; if they cannot be performed
1187 immediately, they are queued. We set number of simultaneous prefetches
1188 to a large constant to reflect this (it probably is not a good idea not
1189 to limit number of prefetches at all, as their execution also takes some
1190 time). */
1191 100, /* number of parallel prefetches */
1192 2, /* Branch cost */
1193 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1194 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1195 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1196 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1197 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1198 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1200 bdver3_memcpy,
1201 bdver3_memset,
1202 6, /* scalar_stmt_cost. */
1203 4, /* scalar load_cost. */
1204 4, /* scalar_store_cost. */
1205 6, /* vec_stmt_cost. */
1206 0, /* vec_to_scalar_cost. */
1207 2, /* scalar_to_vec_cost. */
1208 4, /* vec_align_load_cost. */
1209 4, /* vec_unalign_load_cost. */
1210 4, /* vec_store_cost. */
1211 2, /* cond_taken_branch_cost. */
1212 1, /* cond_not_taken_branch_cost. */
1215 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1216 very small blocks it is better to use loop. For large blocks, libcall
1217 can do nontemporary accesses and beat inline considerably. */
1218 static stringop_algs bdver4_memcpy[2] = {
1219 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1220 {-1, rep_prefix_4_byte, false}}},
1221 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1222 {-1, libcall, false}}}};
1223 static stringop_algs bdver4_memset[2] = {
1224 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1225 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1226 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1227 {-1, libcall, false}}}};
1228 struct processor_costs bdver4_cost = {
1229 COSTS_N_INSNS (1), /* cost of an add instruction */
1230 COSTS_N_INSNS (1), /* cost of a lea instruction */
1231 COSTS_N_INSNS (1), /* variable shift costs */
1232 COSTS_N_INSNS (1), /* constant shift costs */
1233 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1234 COSTS_N_INSNS (4), /* HI */
1235 COSTS_N_INSNS (4), /* SI */
1236 COSTS_N_INSNS (6), /* DI */
1237 COSTS_N_INSNS (6)}, /* other */
1238 0, /* cost of multiply per each bit set */
1239 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1240 COSTS_N_INSNS (35), /* HI */
1241 COSTS_N_INSNS (51), /* SI */
1242 COSTS_N_INSNS (83), /* DI */
1243 COSTS_N_INSNS (83)}, /* other */
1244 COSTS_N_INSNS (1), /* cost of movsx */
1245 COSTS_N_INSNS (1), /* cost of movzx */
1246 8, /* "large" insn */
1247 9, /* MOVE_RATIO */
1248 4, /* cost for loading QImode using movzbl */
1249 {5, 5, 4}, /* cost of loading integer registers
1250 in QImode, HImode and SImode.
1251 Relative to reg-reg move (2). */
1252 {4, 4, 4}, /* cost of storing integer registers */
1253 2, /* cost of reg,reg fld/fst */
1254 {5, 5, 12}, /* cost of loading fp registers
1255 in SFmode, DFmode and XFmode */
1256 {4, 4, 8}, /* cost of storing fp registers
1257 in SFmode, DFmode and XFmode */
1258 2, /* cost of moving MMX register */
1259 {4, 4}, /* cost of loading MMX registers
1260 in SImode and DImode */
1261 {4, 4}, /* cost of storing MMX registers
1262 in SImode and DImode */
1263 2, /* cost of moving SSE register */
1264 {4, 4, 4}, /* cost of loading SSE registers
1265 in SImode, DImode and TImode */
1266 {4, 4, 4}, /* cost of storing SSE registers
1267 in SImode, DImode and TImode */
1268 2, /* MMX or SSE register to integer */
1269 16, /* size of l1 cache. */
1270 2048, /* size of l2 cache. */
1271 64, /* size of prefetch block */
1272 /* New AMD processors never drop prefetches; if they cannot be performed
1273 immediately, they are queued. We set number of simultaneous prefetches
1274 to a large constant to reflect this (it probably is not a good idea not
1275 to limit number of prefetches at all, as their execution also takes some
1276 time). */
1277 100, /* number of parallel prefetches */
1278 2, /* Branch cost */
1279 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1280 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1281 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1282 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1283 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1284 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1286 bdver4_memcpy,
1287 bdver4_memset,
1288 6, /* scalar_stmt_cost. */
1289 4, /* scalar load_cost. */
1290 4, /* scalar_store_cost. */
1291 6, /* vec_stmt_cost. */
1292 0, /* vec_to_scalar_cost. */
1293 2, /* scalar_to_vec_cost. */
1294 4, /* vec_align_load_cost. */
1295 4, /* vec_unalign_load_cost. */
1296 4, /* vec_store_cost. */
1297 2, /* cond_taken_branch_cost. */
1298 1, /* cond_not_taken_branch_cost. */
1301 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1302 very small blocks it is better to use loop. For large blocks, libcall can
1303 do nontemporary accesses and beat inline considerably. */
1304 static stringop_algs btver1_memcpy[2] = {
1305 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1306 {-1, rep_prefix_4_byte, false}}},
1307 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1308 {-1, libcall, false}}}};
1309 static stringop_algs btver1_memset[2] = {
1310 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1311 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1312 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1313 {-1, libcall, false}}}};
1314 const struct processor_costs btver1_cost = {
1315 COSTS_N_INSNS (1), /* cost of an add instruction */
1316 COSTS_N_INSNS (2), /* cost of a lea instruction */
1317 COSTS_N_INSNS (1), /* variable shift costs */
1318 COSTS_N_INSNS (1), /* constant shift costs */
1319 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1320 COSTS_N_INSNS (4), /* HI */
1321 COSTS_N_INSNS (3), /* SI */
1322 COSTS_N_INSNS (4), /* DI */
1323 COSTS_N_INSNS (5)}, /* other */
1324 0, /* cost of multiply per each bit set */
1325 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1326 COSTS_N_INSNS (35), /* HI */
1327 COSTS_N_INSNS (51), /* SI */
1328 COSTS_N_INSNS (83), /* DI */
1329 COSTS_N_INSNS (83)}, /* other */
1330 COSTS_N_INSNS (1), /* cost of movsx */
1331 COSTS_N_INSNS (1), /* cost of movzx */
1332 8, /* "large" insn */
1333 9, /* MOVE_RATIO */
1334 4, /* cost for loading QImode using movzbl */
1335 {3, 4, 3}, /* cost of loading integer registers
1336 in QImode, HImode and SImode.
1337 Relative to reg-reg move (2). */
1338 {3, 4, 3}, /* cost of storing integer registers */
1339 4, /* cost of reg,reg fld/fst */
1340 {4, 4, 12}, /* cost of loading fp registers
1341 in SFmode, DFmode and XFmode */
1342 {6, 6, 8}, /* cost of storing fp registers
1343 in SFmode, DFmode and XFmode */
1344 2, /* cost of moving MMX register */
1345 {3, 3}, /* cost of loading MMX registers
1346 in SImode and DImode */
1347 {4, 4}, /* cost of storing MMX registers
1348 in SImode and DImode */
1349 2, /* cost of moving SSE register */
1350 {4, 4, 3}, /* cost of loading SSE registers
1351 in SImode, DImode and TImode */
1352 {4, 4, 5}, /* cost of storing SSE registers
1353 in SImode, DImode and TImode */
1354 3, /* MMX or SSE register to integer */
1355 /* On K8:
1356 MOVD reg64, xmmreg Double FSTORE 4
1357 MOVD reg32, xmmreg Double FSTORE 4
1358 On AMDFAM10:
1359 MOVD reg64, xmmreg Double FADD 3
1360 1/1 1/1
1361 MOVD reg32, xmmreg Double FADD 3
1362 1/1 1/1 */
1363 32, /* size of l1 cache. */
1364 512, /* size of l2 cache. */
1365 64, /* size of prefetch block */
1366 100, /* number of parallel prefetches */
1367 2, /* Branch cost */
1368 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1369 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1370 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1371 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1372 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1373 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1375 btver1_memcpy,
1376 btver1_memset,
1377 4, /* scalar_stmt_cost. */
1378 2, /* scalar load_cost. */
1379 2, /* scalar_store_cost. */
1380 6, /* vec_stmt_cost. */
1381 0, /* vec_to_scalar_cost. */
1382 2, /* scalar_to_vec_cost. */
1383 2, /* vec_align_load_cost. */
1384 2, /* vec_unalign_load_cost. */
1385 2, /* vec_store_cost. */
1386 2, /* cond_taken_branch_cost. */
1387 1, /* cond_not_taken_branch_cost. */
1390 static stringop_algs btver2_memcpy[2] = {
1391 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1392 {-1, rep_prefix_4_byte, false}}},
1393 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1394 {-1, libcall, false}}}};
1395 static stringop_algs btver2_memset[2] = {
1396 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1397 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1398 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1399 {-1, libcall, false}}}};
1400 const struct processor_costs btver2_cost = {
1401 COSTS_N_INSNS (1), /* cost of an add instruction */
1402 COSTS_N_INSNS (2), /* cost of a lea instruction */
1403 COSTS_N_INSNS (1), /* variable shift costs */
1404 COSTS_N_INSNS (1), /* constant shift costs */
1405 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1406 COSTS_N_INSNS (4), /* HI */
1407 COSTS_N_INSNS (3), /* SI */
1408 COSTS_N_INSNS (4), /* DI */
1409 COSTS_N_INSNS (5)}, /* other */
1410 0, /* cost of multiply per each bit set */
1411 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1412 COSTS_N_INSNS (35), /* HI */
1413 COSTS_N_INSNS (51), /* SI */
1414 COSTS_N_INSNS (83), /* DI */
1415 COSTS_N_INSNS (83)}, /* other */
1416 COSTS_N_INSNS (1), /* cost of movsx */
1417 COSTS_N_INSNS (1), /* cost of movzx */
1418 8, /* "large" insn */
1419 9, /* MOVE_RATIO */
1420 4, /* cost for loading QImode using movzbl */
1421 {3, 4, 3}, /* cost of loading integer registers
1422 in QImode, HImode and SImode.
1423 Relative to reg-reg move (2). */
1424 {3, 4, 3}, /* cost of storing integer registers */
1425 4, /* cost of reg,reg fld/fst */
1426 {4, 4, 12}, /* cost of loading fp registers
1427 in SFmode, DFmode and XFmode */
1428 {6, 6, 8}, /* cost of storing fp registers
1429 in SFmode, DFmode and XFmode */
1430 2, /* cost of moving MMX register */
1431 {3, 3}, /* cost of loading MMX registers
1432 in SImode and DImode */
1433 {4, 4}, /* cost of storing MMX registers
1434 in SImode and DImode */
1435 2, /* cost of moving SSE register */
1436 {4, 4, 3}, /* cost of loading SSE registers
1437 in SImode, DImode and TImode */
1438 {4, 4, 5}, /* cost of storing SSE registers
1439 in SImode, DImode and TImode */
1440 3, /* MMX or SSE register to integer */
1441 /* On K8:
1442 MOVD reg64, xmmreg Double FSTORE 4
1443 MOVD reg32, xmmreg Double FSTORE 4
1444 On AMDFAM10:
1445 MOVD reg64, xmmreg Double FADD 3
1446 1/1 1/1
1447 MOVD reg32, xmmreg Double FADD 3
1448 1/1 1/1 */
1449 32, /* size of l1 cache. */
1450 2048, /* size of l2 cache. */
1451 64, /* size of prefetch block */
1452 100, /* number of parallel prefetches */
1453 2, /* Branch cost */
1454 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1455 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1456 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1457 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1458 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1459 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1460 btver2_memcpy,
1461 btver2_memset,
1462 4, /* scalar_stmt_cost. */
1463 2, /* scalar load_cost. */
1464 2, /* scalar_store_cost. */
1465 6, /* vec_stmt_cost. */
1466 0, /* vec_to_scalar_cost. */
1467 2, /* scalar_to_vec_cost. */
1468 2, /* vec_align_load_cost. */
1469 2, /* vec_unalign_load_cost. */
1470 2, /* vec_store_cost. */
1471 2, /* cond_taken_branch_cost. */
1472 1, /* cond_not_taken_branch_cost. */
1475 static stringop_algs pentium4_memcpy[2] = {
1476 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1477 DUMMY_STRINGOP_ALGS};
1478 static stringop_algs pentium4_memset[2] = {
1479 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1480 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1481 DUMMY_STRINGOP_ALGS};
1483 static const
1484 struct processor_costs pentium4_cost = {
1485 COSTS_N_INSNS (1), /* cost of an add instruction */
1486 COSTS_N_INSNS (3), /* cost of a lea instruction */
1487 COSTS_N_INSNS (4), /* variable shift costs */
1488 COSTS_N_INSNS (4), /* constant shift costs */
1489 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1490 COSTS_N_INSNS (15), /* HI */
1491 COSTS_N_INSNS (15), /* SI */
1492 COSTS_N_INSNS (15), /* DI */
1493 COSTS_N_INSNS (15)}, /* other */
1494 0, /* cost of multiply per each bit set */
1495 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1496 COSTS_N_INSNS (56), /* HI */
1497 COSTS_N_INSNS (56), /* SI */
1498 COSTS_N_INSNS (56), /* DI */
1499 COSTS_N_INSNS (56)}, /* other */
1500 COSTS_N_INSNS (1), /* cost of movsx */
1501 COSTS_N_INSNS (1), /* cost of movzx */
1502 16, /* "large" insn */
1503 6, /* MOVE_RATIO */
1504 2, /* cost for loading QImode using movzbl */
1505 {4, 5, 4}, /* cost of loading integer registers
1506 in QImode, HImode and SImode.
1507 Relative to reg-reg move (2). */
1508 {2, 3, 2}, /* cost of storing integer registers */
1509 2, /* cost of reg,reg fld/fst */
1510 {2, 2, 6}, /* cost of loading fp registers
1511 in SFmode, DFmode and XFmode */
1512 {4, 4, 6}, /* cost of storing fp registers
1513 in SFmode, DFmode and XFmode */
1514 2, /* cost of moving MMX register */
1515 {2, 2}, /* cost of loading MMX registers
1516 in SImode and DImode */
1517 {2, 2}, /* cost of storing MMX registers
1518 in SImode and DImode */
1519 12, /* cost of moving SSE register */
1520 {12, 12, 12}, /* cost of loading SSE registers
1521 in SImode, DImode and TImode */
1522 {2, 2, 8}, /* cost of storing SSE registers
1523 in SImode, DImode and TImode */
1524 10, /* MMX or SSE register to integer */
1525 8, /* size of l1 cache. */
1526 256, /* size of l2 cache. */
1527 64, /* size of prefetch block */
1528 6, /* number of parallel prefetches */
1529 2, /* Branch cost */
1530 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1531 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1532 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1533 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1534 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1535 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1536 pentium4_memcpy,
1537 pentium4_memset,
1538 1, /* scalar_stmt_cost. */
1539 1, /* scalar load_cost. */
1540 1, /* scalar_store_cost. */
1541 1, /* vec_stmt_cost. */
1542 1, /* vec_to_scalar_cost. */
1543 1, /* scalar_to_vec_cost. */
1544 1, /* vec_align_load_cost. */
1545 2, /* vec_unalign_load_cost. */
1546 1, /* vec_store_cost. */
1547 3, /* cond_taken_branch_cost. */
1548 1, /* cond_not_taken_branch_cost. */
1551 static stringop_algs nocona_memcpy[2] = {
1552 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1553 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1554 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1556 static stringop_algs nocona_memset[2] = {
1557 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1558 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1559 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1560 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1562 static const
1563 struct processor_costs nocona_cost = {
1564 COSTS_N_INSNS (1), /* cost of an add instruction */
1565 COSTS_N_INSNS (1), /* cost of a lea instruction */
1566 COSTS_N_INSNS (1), /* variable shift costs */
1567 COSTS_N_INSNS (1), /* constant shift costs */
1568 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1569 COSTS_N_INSNS (10), /* HI */
1570 COSTS_N_INSNS (10), /* SI */
1571 COSTS_N_INSNS (10), /* DI */
1572 COSTS_N_INSNS (10)}, /* other */
1573 0, /* cost of multiply per each bit set */
1574 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1575 COSTS_N_INSNS (66), /* HI */
1576 COSTS_N_INSNS (66), /* SI */
1577 COSTS_N_INSNS (66), /* DI */
1578 COSTS_N_INSNS (66)}, /* other */
1579 COSTS_N_INSNS (1), /* cost of movsx */
1580 COSTS_N_INSNS (1), /* cost of movzx */
1581 16, /* "large" insn */
1582 17, /* MOVE_RATIO */
1583 4, /* cost for loading QImode using movzbl */
1584 {4, 4, 4}, /* cost of loading integer registers
1585 in QImode, HImode and SImode.
1586 Relative to reg-reg move (2). */
1587 {4, 4, 4}, /* cost of storing integer registers */
1588 3, /* cost of reg,reg fld/fst */
1589 {12, 12, 12}, /* cost of loading fp registers
1590 in SFmode, DFmode and XFmode */
1591 {4, 4, 4}, /* cost of storing fp registers
1592 in SFmode, DFmode and XFmode */
1593 6, /* cost of moving MMX register */
1594 {12, 12}, /* cost of loading MMX registers
1595 in SImode and DImode */
1596 {12, 12}, /* cost of storing MMX registers
1597 in SImode and DImode */
1598 6, /* cost of moving SSE register */
1599 {12, 12, 12}, /* cost of loading SSE registers
1600 in SImode, DImode and TImode */
1601 {12, 12, 12}, /* cost of storing SSE registers
1602 in SImode, DImode and TImode */
1603 8, /* MMX or SSE register to integer */
1604 8, /* size of l1 cache. */
1605 1024, /* size of l2 cache. */
1606 64, /* size of prefetch block */
1607 8, /* number of parallel prefetches */
1608 1, /* Branch cost */
1609 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1610 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1611 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1612 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1613 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1614 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1615 nocona_memcpy,
1616 nocona_memset,
1617 1, /* scalar_stmt_cost. */
1618 1, /* scalar load_cost. */
1619 1, /* scalar_store_cost. */
1620 1, /* vec_stmt_cost. */
1621 1, /* vec_to_scalar_cost. */
1622 1, /* scalar_to_vec_cost. */
1623 1, /* vec_align_load_cost. */
1624 2, /* vec_unalign_load_cost. */
1625 1, /* vec_store_cost. */
1626 3, /* cond_taken_branch_cost. */
1627 1, /* cond_not_taken_branch_cost. */
1630 static stringop_algs atom_memcpy[2] = {
1631 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1632 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1633 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1634 static stringop_algs atom_memset[2] = {
1635 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1636 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1637 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1638 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1639 static const
1640 struct processor_costs atom_cost = {
1641 COSTS_N_INSNS (1), /* cost of an add instruction */
1642 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1643 COSTS_N_INSNS (1), /* variable shift costs */
1644 COSTS_N_INSNS (1), /* constant shift costs */
1645 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1646 COSTS_N_INSNS (4), /* HI */
1647 COSTS_N_INSNS (3), /* SI */
1648 COSTS_N_INSNS (4), /* DI */
1649 COSTS_N_INSNS (2)}, /* other */
1650 0, /* cost of multiply per each bit set */
1651 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1652 COSTS_N_INSNS (26), /* HI */
1653 COSTS_N_INSNS (42), /* SI */
1654 COSTS_N_INSNS (74), /* DI */
1655 COSTS_N_INSNS (74)}, /* other */
1656 COSTS_N_INSNS (1), /* cost of movsx */
1657 COSTS_N_INSNS (1), /* cost of movzx */
1658 8, /* "large" insn */
1659 17, /* MOVE_RATIO */
1660 4, /* cost for loading QImode using movzbl */
1661 {4, 4, 4}, /* cost of loading integer registers
1662 in QImode, HImode and SImode.
1663 Relative to reg-reg move (2). */
1664 {4, 4, 4}, /* cost of storing integer registers */
1665 4, /* cost of reg,reg fld/fst */
1666 {12, 12, 12}, /* cost of loading fp registers
1667 in SFmode, DFmode and XFmode */
1668 {6, 6, 8}, /* cost of storing fp registers
1669 in SFmode, DFmode and XFmode */
1670 2, /* cost of moving MMX register */
1671 {8, 8}, /* cost of loading MMX registers
1672 in SImode and DImode */
1673 {8, 8}, /* cost of storing MMX registers
1674 in SImode and DImode */
1675 2, /* cost of moving SSE register */
1676 {8, 8, 8}, /* cost of loading SSE registers
1677 in SImode, DImode and TImode */
1678 {8, 8, 8}, /* cost of storing SSE registers
1679 in SImode, DImode and TImode */
1680 5, /* MMX or SSE register to integer */
1681 32, /* size of l1 cache. */
1682 256, /* size of l2 cache. */
1683 64, /* size of prefetch block */
1684 6, /* number of parallel prefetches */
1685 3, /* Branch cost */
1686 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1687 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1688 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1689 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1690 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1691 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1692 atom_memcpy,
1693 atom_memset,
1694 1, /* scalar_stmt_cost. */
1695 1, /* scalar load_cost. */
1696 1, /* scalar_store_cost. */
1697 1, /* vec_stmt_cost. */
1698 1, /* vec_to_scalar_cost. */
1699 1, /* scalar_to_vec_cost. */
1700 1, /* vec_align_load_cost. */
1701 2, /* vec_unalign_load_cost. */
1702 1, /* vec_store_cost. */
1703 3, /* cond_taken_branch_cost. */
1704 1, /* cond_not_taken_branch_cost. */
1707 static stringop_algs slm_memcpy[2] = {
1708 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1709 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1710 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1711 static stringop_algs slm_memset[2] = {
1712 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1713 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1714 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1715 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1716 static const
1717 struct processor_costs slm_cost = {
1718 COSTS_N_INSNS (1), /* cost of an add instruction */
1719 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1720 COSTS_N_INSNS (1), /* variable shift costs */
1721 COSTS_N_INSNS (1), /* constant shift costs */
1722 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1723 COSTS_N_INSNS (3), /* HI */
1724 COSTS_N_INSNS (3), /* SI */
1725 COSTS_N_INSNS (4), /* DI */
1726 COSTS_N_INSNS (2)}, /* other */
1727 0, /* cost of multiply per each bit set */
1728 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1729 COSTS_N_INSNS (26), /* HI */
1730 COSTS_N_INSNS (42), /* SI */
1731 COSTS_N_INSNS (74), /* DI */
1732 COSTS_N_INSNS (74)}, /* other */
1733 COSTS_N_INSNS (1), /* cost of movsx */
1734 COSTS_N_INSNS (1), /* cost of movzx */
1735 8, /* "large" insn */
1736 17, /* MOVE_RATIO */
1737 4, /* cost for loading QImode using movzbl */
1738 {4, 4, 4}, /* cost of loading integer registers
1739 in QImode, HImode and SImode.
1740 Relative to reg-reg move (2). */
1741 {4, 4, 4}, /* cost of storing integer registers */
1742 4, /* cost of reg,reg fld/fst */
1743 {12, 12, 12}, /* cost of loading fp registers
1744 in SFmode, DFmode and XFmode */
1745 {6, 6, 8}, /* cost of storing fp registers
1746 in SFmode, DFmode and XFmode */
1747 2, /* cost of moving MMX register */
1748 {8, 8}, /* cost of loading MMX registers
1749 in SImode and DImode */
1750 {8, 8}, /* cost of storing MMX registers
1751 in SImode and DImode */
1752 2, /* cost of moving SSE register */
1753 {8, 8, 8}, /* cost of loading SSE registers
1754 in SImode, DImode and TImode */
1755 {8, 8, 8}, /* cost of storing SSE registers
1756 in SImode, DImode and TImode */
1757 5, /* MMX or SSE register to integer */
1758 32, /* size of l1 cache. */
1759 256, /* size of l2 cache. */
1760 64, /* size of prefetch block */
1761 6, /* number of parallel prefetches */
1762 3, /* Branch cost */
1763 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1764 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1765 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1766 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1767 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1768 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1769 slm_memcpy,
1770 slm_memset,
1771 1, /* scalar_stmt_cost. */
1772 1, /* scalar load_cost. */
1773 1, /* scalar_store_cost. */
1774 1, /* vec_stmt_cost. */
1775 4, /* vec_to_scalar_cost. */
1776 1, /* scalar_to_vec_cost. */
1777 1, /* vec_align_load_cost. */
1778 2, /* vec_unalign_load_cost. */
1779 1, /* vec_store_cost. */
1780 3, /* cond_taken_branch_cost. */
1781 1, /* cond_not_taken_branch_cost. */
1784 static stringop_algs intel_memcpy[2] = {
1785 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1786 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1787 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1788 static stringop_algs intel_memset[2] = {
1789 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1790 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1791 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1792 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1793 static const
1794 struct processor_costs intel_cost = {
1795 COSTS_N_INSNS (1), /* cost of an add instruction */
1796 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1797 COSTS_N_INSNS (1), /* variable shift costs */
1798 COSTS_N_INSNS (1), /* constant shift costs */
1799 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1800 COSTS_N_INSNS (3), /* HI */
1801 COSTS_N_INSNS (3), /* SI */
1802 COSTS_N_INSNS (4), /* DI */
1803 COSTS_N_INSNS (2)}, /* other */
1804 0, /* cost of multiply per each bit set */
1805 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1806 COSTS_N_INSNS (26), /* HI */
1807 COSTS_N_INSNS (42), /* SI */
1808 COSTS_N_INSNS (74), /* DI */
1809 COSTS_N_INSNS (74)}, /* other */
1810 COSTS_N_INSNS (1), /* cost of movsx */
1811 COSTS_N_INSNS (1), /* cost of movzx */
1812 8, /* "large" insn */
1813 17, /* MOVE_RATIO */
1814 4, /* cost for loading QImode using movzbl */
1815 {4, 4, 4}, /* cost of loading integer registers
1816 in QImode, HImode and SImode.
1817 Relative to reg-reg move (2). */
1818 {4, 4, 4}, /* cost of storing integer registers */
1819 4, /* cost of reg,reg fld/fst */
1820 {12, 12, 12}, /* cost of loading fp registers
1821 in SFmode, DFmode and XFmode */
1822 {6, 6, 8}, /* cost of storing fp registers
1823 in SFmode, DFmode and XFmode */
1824 2, /* cost of moving MMX register */
1825 {8, 8}, /* cost of loading MMX registers
1826 in SImode and DImode */
1827 {8, 8}, /* cost of storing MMX registers
1828 in SImode and DImode */
1829 2, /* cost of moving SSE register */
1830 {8, 8, 8}, /* cost of loading SSE registers
1831 in SImode, DImode and TImode */
1832 {8, 8, 8}, /* cost of storing SSE registers
1833 in SImode, DImode and TImode */
1834 5, /* MMX or SSE register to integer */
1835 32, /* size of l1 cache. */
1836 256, /* size of l2 cache. */
1837 64, /* size of prefetch block */
1838 6, /* number of parallel prefetches */
1839 3, /* Branch cost */
1840 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1841 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1842 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1843 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1844 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1845 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1846 intel_memcpy,
1847 intel_memset,
1848 1, /* scalar_stmt_cost. */
1849 1, /* scalar load_cost. */
1850 1, /* scalar_store_cost. */
1851 1, /* vec_stmt_cost. */
1852 4, /* vec_to_scalar_cost. */
1853 1, /* scalar_to_vec_cost. */
1854 1, /* vec_align_load_cost. */
1855 2, /* vec_unalign_load_cost. */
1856 1, /* vec_store_cost. */
1857 3, /* cond_taken_branch_cost. */
1858 1, /* cond_not_taken_branch_cost. */
1861 /* Generic should produce code tuned for Core-i7 (and newer chips)
1862 and btver1 (and newer chips). */
1864 static stringop_algs generic_memcpy[2] = {
1865 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1866 {-1, libcall, false}}},
1867 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1868 {-1, libcall, false}}}};
1869 static stringop_algs generic_memset[2] = {
1870 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1871 {-1, libcall, false}}},
1872 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1873 {-1, libcall, false}}}};
1874 static const
1875 struct processor_costs generic_cost = {
1876 COSTS_N_INSNS (1), /* cost of an add instruction */
1877 /* On all chips taken into consideration lea is 2 cycles and more. With
1878 this cost however our current implementation of synth_mult results in
1879 use of unnecessary temporary registers causing regression on several
1880 SPECfp benchmarks. */
1881 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1882 COSTS_N_INSNS (1), /* variable shift costs */
1883 COSTS_N_INSNS (1), /* constant shift costs */
1884 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1885 COSTS_N_INSNS (4), /* HI */
1886 COSTS_N_INSNS (3), /* SI */
1887 COSTS_N_INSNS (4), /* DI */
1888 COSTS_N_INSNS (2)}, /* other */
1889 0, /* cost of multiply per each bit set */
1890 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1891 COSTS_N_INSNS (26), /* HI */
1892 COSTS_N_INSNS (42), /* SI */
1893 COSTS_N_INSNS (74), /* DI */
1894 COSTS_N_INSNS (74)}, /* other */
1895 COSTS_N_INSNS (1), /* cost of movsx */
1896 COSTS_N_INSNS (1), /* cost of movzx */
1897 8, /* "large" insn */
1898 17, /* MOVE_RATIO */
1899 4, /* cost for loading QImode using movzbl */
1900 {4, 4, 4}, /* cost of loading integer registers
1901 in QImode, HImode and SImode.
1902 Relative to reg-reg move (2). */
1903 {4, 4, 4}, /* cost of storing integer registers */
1904 4, /* cost of reg,reg fld/fst */
1905 {12, 12, 12}, /* cost of loading fp registers
1906 in SFmode, DFmode and XFmode */
1907 {6, 6, 8}, /* cost of storing fp registers
1908 in SFmode, DFmode and XFmode */
1909 2, /* cost of moving MMX register */
1910 {8, 8}, /* cost of loading MMX registers
1911 in SImode and DImode */
1912 {8, 8}, /* cost of storing MMX registers
1913 in SImode and DImode */
1914 2, /* cost of moving SSE register */
1915 {8, 8, 8}, /* cost of loading SSE registers
1916 in SImode, DImode and TImode */
1917 {8, 8, 8}, /* cost of storing SSE registers
1918 in SImode, DImode and TImode */
1919 5, /* MMX or SSE register to integer */
1920 32, /* size of l1 cache. */
1921 512, /* size of l2 cache. */
1922 64, /* size of prefetch block */
1923 6, /* number of parallel prefetches */
1924 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1925 value is increased to perhaps more appropriate value of 5. */
1926 3, /* Branch cost */
1927 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1928 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1929 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1930 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1931 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1932 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1933 generic_memcpy,
1934 generic_memset,
1935 1, /* scalar_stmt_cost. */
1936 1, /* scalar load_cost. */
1937 1, /* scalar_store_cost. */
1938 1, /* vec_stmt_cost. */
1939 1, /* vec_to_scalar_cost. */
1940 1, /* scalar_to_vec_cost. */
1941 1, /* vec_align_load_cost. */
1942 2, /* vec_unalign_load_cost. */
1943 1, /* vec_store_cost. */
1944 3, /* cond_taken_branch_cost. */
1945 1, /* cond_not_taken_branch_cost. */
1948 /* core_cost should produce code tuned for Core familly of CPUs. */
1949 static stringop_algs core_memcpy[2] = {
1950 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1951 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1952 {-1, libcall, false}}}};
1953 static stringop_algs core_memset[2] = {
1954 {libcall, {{6, loop_1_byte, true},
1955 {24, loop, true},
1956 {8192, rep_prefix_4_byte, true},
1957 {-1, libcall, false}}},
1958 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1959 {-1, libcall, false}}}};
1961 static const
1962 struct processor_costs core_cost = {
1963 COSTS_N_INSNS (1), /* cost of an add instruction */
1964 /* On all chips taken into consideration lea is 2 cycles and more. With
1965 this cost however our current implementation of synth_mult results in
1966 use of unnecessary temporary registers causing regression on several
1967 SPECfp benchmarks. */
1968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1969 COSTS_N_INSNS (1), /* variable shift costs */
1970 COSTS_N_INSNS (1), /* constant shift costs */
1971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1972 COSTS_N_INSNS (4), /* HI */
1973 COSTS_N_INSNS (3), /* SI */
1974 COSTS_N_INSNS (4), /* DI */
1975 COSTS_N_INSNS (2)}, /* other */
1976 0, /* cost of multiply per each bit set */
1977 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1978 COSTS_N_INSNS (26), /* HI */
1979 COSTS_N_INSNS (42), /* SI */
1980 COSTS_N_INSNS (74), /* DI */
1981 COSTS_N_INSNS (74)}, /* other */
1982 COSTS_N_INSNS (1), /* cost of movsx */
1983 COSTS_N_INSNS (1), /* cost of movzx */
1984 8, /* "large" insn */
1985 17, /* MOVE_RATIO */
1986 4, /* cost for loading QImode using movzbl */
1987 {4, 4, 4}, /* cost of loading integer registers
1988 in QImode, HImode and SImode.
1989 Relative to reg-reg move (2). */
1990 {4, 4, 4}, /* cost of storing integer registers */
1991 4, /* cost of reg,reg fld/fst */
1992 {12, 12, 12}, /* cost of loading fp registers
1993 in SFmode, DFmode and XFmode */
1994 {6, 6, 8}, /* cost of storing fp registers
1995 in SFmode, DFmode and XFmode */
1996 2, /* cost of moving MMX register */
1997 {8, 8}, /* cost of loading MMX registers
1998 in SImode and DImode */
1999 {8, 8}, /* cost of storing MMX registers
2000 in SImode and DImode */
2001 2, /* cost of moving SSE register */
2002 {8, 8, 8}, /* cost of loading SSE registers
2003 in SImode, DImode and TImode */
2004 {8, 8, 8}, /* cost of storing SSE registers
2005 in SImode, DImode and TImode */
2006 5, /* MMX or SSE register to integer */
2007 64, /* size of l1 cache. */
2008 512, /* size of l2 cache. */
2009 64, /* size of prefetch block */
2010 6, /* number of parallel prefetches */
2011 /* FIXME perhaps more appropriate value is 5. */
2012 3, /* Branch cost */
2013 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2014 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2015 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2016 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2017 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2018 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2019 core_memcpy,
2020 core_memset,
2021 1, /* scalar_stmt_cost. */
2022 1, /* scalar load_cost. */
2023 1, /* scalar_store_cost. */
2024 1, /* vec_stmt_cost. */
2025 1, /* vec_to_scalar_cost. */
2026 1, /* scalar_to_vec_cost. */
2027 1, /* vec_align_load_cost. */
2028 2, /* vec_unalign_load_cost. */
2029 1, /* vec_store_cost. */
2030 3, /* cond_taken_branch_cost. */
2031 1, /* cond_not_taken_branch_cost. */
2035 /* Set by -mtune. */
2036 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2038 /* Set by -mtune or -Os. */
2039 const struct processor_costs *ix86_cost = &pentium_cost;
2041 /* Processor feature/optimization bitmasks. */
2042 #define m_386 (1<<PROCESSOR_I386)
2043 #define m_486 (1<<PROCESSOR_I486)
2044 #define m_PENT (1<<PROCESSOR_PENTIUM)
2045 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2046 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2047 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2048 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2049 #define m_CORE2 (1<<PROCESSOR_CORE2)
2050 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2051 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2052 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2053 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2054 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2055 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2056 #define m_KNL (1<<PROCESSOR_KNL)
2057 #define m_INTEL (1<<PROCESSOR_INTEL)
2059 #define m_GEODE (1<<PROCESSOR_GEODE)
2060 #define m_K6 (1<<PROCESSOR_K6)
2061 #define m_K6_GEODE (m_K6 | m_GEODE)
2062 #define m_K8 (1<<PROCESSOR_K8)
2063 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2064 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2065 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2066 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2067 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2068 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2069 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2070 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2071 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2072 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2073 #define m_BTVER (m_BTVER1 | m_BTVER2)
2074 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2076 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2078 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2079 #undef DEF_TUNE
2080 #define DEF_TUNE(tune, name, selector) name,
2081 #include "x86-tune.def"
2082 #undef DEF_TUNE
2085 /* Feature tests against the various tunings. */
2086 unsigned char ix86_tune_features[X86_TUNE_LAST];
2088 /* Feature tests against the various tunings used to create ix86_tune_features
2089 based on the processor mask. */
2090 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2091 #undef DEF_TUNE
2092 #define DEF_TUNE(tune, name, selector) selector,
2093 #include "x86-tune.def"
2094 #undef DEF_TUNE
2097 /* Feature tests against the various architecture variations. */
2098 unsigned char ix86_arch_features[X86_ARCH_LAST];
2100 /* Feature tests against the various architecture variations, used to create
2101 ix86_arch_features based on the processor mask. */
2102 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2103 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2104 ~(m_386 | m_486 | m_PENT | m_K6),
2106 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2107 ~m_386,
2109 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2110 ~(m_386 | m_486),
2112 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2113 ~m_386,
2115 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2116 ~m_386,
2119 /* In case the average insn count for single function invocation is
2120 lower than this constant, emit fast (but longer) prologue and
2121 epilogue code. */
2122 #define FAST_PROLOGUE_INSN_COUNT 20
2124 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2125 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2126 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2127 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2129 /* Array of the smallest class containing reg number REGNO, indexed by
2130 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2132 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2134 /* ax, dx, cx, bx */
2135 AREG, DREG, CREG, BREG,
2136 /* si, di, bp, sp */
2137 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2138 /* FP registers */
2139 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2140 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2141 /* arg pointer */
2142 NON_Q_REGS,
2143 /* flags, fpsr, fpcr, frame */
2144 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2145 /* SSE registers */
2146 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2147 SSE_REGS, SSE_REGS,
2148 /* MMX registers */
2149 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2150 MMX_REGS, MMX_REGS,
2151 /* REX registers */
2152 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2153 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2154 /* SSE REX registers */
2155 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2156 SSE_REGS, SSE_REGS,
2157 /* AVX-512 SSE registers */
2158 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2159 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2160 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2161 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2162 /* Mask registers. */
2163 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2164 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2165 /* MPX bound registers */
2166 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2169 /* The "default" register map used in 32bit mode. */
2171 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2173 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2174 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2175 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2176 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2177 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2178 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2179 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2180 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2181 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2182 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2183 101, 102, 103, 104, /* bound registers */
2186 /* The "default" register map used in 64bit mode. */
2188 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2190 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2191 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2192 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2193 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2194 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2195 8,9,10,11,12,13,14,15, /* extended integer registers */
2196 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2197 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2198 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2199 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2200 126, 127, 128, 129, /* bound registers */
2203 /* Define the register numbers to be used in Dwarf debugging information.
2204 The SVR4 reference port C compiler uses the following register numbers
2205 in its Dwarf output code:
2206 0 for %eax (gcc regno = 0)
2207 1 for %ecx (gcc regno = 2)
2208 2 for %edx (gcc regno = 1)
2209 3 for %ebx (gcc regno = 3)
2210 4 for %esp (gcc regno = 7)
2211 5 for %ebp (gcc regno = 6)
2212 6 for %esi (gcc regno = 4)
2213 7 for %edi (gcc regno = 5)
2214 The following three DWARF register numbers are never generated by
2215 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2216 believes these numbers have these meanings.
2217 8 for %eip (no gcc equivalent)
2218 9 for %eflags (gcc regno = 17)
2219 10 for %trapno (no gcc equivalent)
2220 It is not at all clear how we should number the FP stack registers
2221 for the x86 architecture. If the version of SDB on x86/svr4 were
2222 a bit less brain dead with respect to floating-point then we would
2223 have a precedent to follow with respect to DWARF register numbers
2224 for x86 FP registers, but the SDB on x86/svr4 is so completely
2225 broken with respect to FP registers that it is hardly worth thinking
2226 of it as something to strive for compatibility with.
2227 The version of x86/svr4 SDB I have at the moment does (partially)
2228 seem to believe that DWARF register number 11 is associated with
2229 the x86 register %st(0), but that's about all. Higher DWARF
2230 register numbers don't seem to be associated with anything in
2231 particular, and even for DWARF regno 11, SDB only seems to under-
2232 stand that it should say that a variable lives in %st(0) (when
2233 asked via an `=' command) if we said it was in DWARF regno 11,
2234 but SDB still prints garbage when asked for the value of the
2235 variable in question (via a `/' command).
2236 (Also note that the labels SDB prints for various FP stack regs
2237 when doing an `x' command are all wrong.)
2238 Note that these problems generally don't affect the native SVR4
2239 C compiler because it doesn't allow the use of -O with -g and
2240 because when it is *not* optimizing, it allocates a memory
2241 location for each floating-point variable, and the memory
2242 location is what gets described in the DWARF AT_location
2243 attribute for the variable in question.
2244 Regardless of the severe mental illness of the x86/svr4 SDB, we
2245 do something sensible here and we use the following DWARF
2246 register numbers. Note that these are all stack-top-relative
2247 numbers.
2248 11 for %st(0) (gcc regno = 8)
2249 12 for %st(1) (gcc regno = 9)
2250 13 for %st(2) (gcc regno = 10)
2251 14 for %st(3) (gcc regno = 11)
2252 15 for %st(4) (gcc regno = 12)
2253 16 for %st(5) (gcc regno = 13)
2254 17 for %st(6) (gcc regno = 14)
2255 18 for %st(7) (gcc regno = 15)
2257 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2259 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2260 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2261 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2262 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2263 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2264 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2265 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2267 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2268 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2269 101, 102, 103, 104, /* bound registers */
2272 /* Define parameter passing and return registers. */
2274 static int const x86_64_int_parameter_registers[6] =
2276 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2279 static int const x86_64_ms_abi_int_parameter_registers[4] =
2281 CX_REG, DX_REG, R8_REG, R9_REG
2284 static int const x86_64_int_return_registers[4] =
2286 AX_REG, DX_REG, DI_REG, SI_REG
2289 /* Additional registers that are clobbered by SYSV calls. */
2291 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2293 SI_REG, DI_REG,
2294 XMM6_REG, XMM7_REG,
2295 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2296 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2299 /* Define the structure for the machine field in struct function. */
2301 struct GTY(()) stack_local_entry {
2302 unsigned short mode;
2303 unsigned short n;
2304 rtx rtl;
2305 struct stack_local_entry *next;
2308 /* Structure describing stack frame layout.
2309 Stack grows downward:
2311 [arguments]
2312 <- ARG_POINTER
2313 saved pc
2315 saved static chain if ix86_static_chain_on_stack
2317 saved frame pointer if frame_pointer_needed
2318 <- HARD_FRAME_POINTER
2319 [saved regs]
2320 <- regs_save_offset
2321 [padding0]
2323 [saved SSE regs]
2324 <- sse_regs_save_offset
2325 [padding1] |
2326 | <- FRAME_POINTER
2327 [va_arg registers] |
2329 [frame] |
2331 [padding2] | = to_allocate
2332 <- STACK_POINTER
2334 struct ix86_frame
2336 int nsseregs;
2337 int nregs;
2338 int va_arg_size;
2339 int red_zone_size;
2340 int outgoing_arguments_size;
2342 /* The offsets relative to ARG_POINTER. */
2343 HOST_WIDE_INT frame_pointer_offset;
2344 HOST_WIDE_INT hard_frame_pointer_offset;
2345 HOST_WIDE_INT stack_pointer_offset;
2346 HOST_WIDE_INT hfp_save_offset;
2347 HOST_WIDE_INT reg_save_offset;
2348 HOST_WIDE_INT sse_reg_save_offset;
2350 /* When save_regs_using_mov is set, emit prologue using
2351 move instead of push instructions. */
2352 bool save_regs_using_mov;
2355 /* Which cpu are we scheduling for. */
2356 enum attr_cpu ix86_schedule;
2358 /* Which cpu are we optimizing for. */
2359 enum processor_type ix86_tune;
2361 /* Which instruction set architecture to use. */
2362 enum processor_type ix86_arch;
2364 /* True if processor has SSE prefetch instruction. */
2365 unsigned char x86_prefetch_sse;
2367 /* -mstackrealign option */
2368 static const char ix86_force_align_arg_pointer_string[]
2369 = "force_align_arg_pointer";
2371 static rtx (*ix86_gen_leave) (void);
2372 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2373 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2374 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2375 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2376 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2377 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2378 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2379 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2380 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2381 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2382 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2384 /* Preferred alignment for stack boundary in bits. */
2385 unsigned int ix86_preferred_stack_boundary;
2387 /* Alignment for incoming stack boundary in bits specified at
2388 command line. */
2389 static unsigned int ix86_user_incoming_stack_boundary;
2391 /* Default alignment for incoming stack boundary in bits. */
2392 static unsigned int ix86_default_incoming_stack_boundary;
2394 /* Alignment for incoming stack boundary in bits. */
2395 unsigned int ix86_incoming_stack_boundary;
2397 /* Calling abi specific va_list type nodes. */
2398 static GTY(()) tree sysv_va_list_type_node;
2399 static GTY(()) tree ms_va_list_type_node;
2401 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2402 char internal_label_prefix[16];
2403 int internal_label_prefix_len;
2405 /* Fence to use after loop using movnt. */
2406 tree x86_mfence;
2408 /* Register class used for passing given 64bit part of the argument.
2409 These represent classes as documented by the PS ABI, with the exception
2410 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2411 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2413 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2414 whenever possible (upper half does contain padding). */
2415 enum x86_64_reg_class
2417 X86_64_NO_CLASS,
2418 X86_64_INTEGER_CLASS,
2419 X86_64_INTEGERSI_CLASS,
2420 X86_64_SSE_CLASS,
2421 X86_64_SSESF_CLASS,
2422 X86_64_SSEDF_CLASS,
2423 X86_64_SSEUP_CLASS,
2424 X86_64_X87_CLASS,
2425 X86_64_X87UP_CLASS,
2426 X86_64_COMPLEX_X87_CLASS,
2427 X86_64_MEMORY_CLASS
2430 #define MAX_CLASSES 8
2432 /* Table of constants used by fldpi, fldln2, etc.... */
2433 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2434 static bool ext_80387_constants_init = 0;
2437 static struct machine_function * ix86_init_machine_status (void);
2438 static rtx ix86_function_value (const_tree, const_tree, bool);
2439 static bool ix86_function_value_regno_p (const unsigned int);
2440 static unsigned int ix86_function_arg_boundary (machine_mode,
2441 const_tree);
2442 static rtx ix86_static_chain (const_tree, bool);
2443 static int ix86_function_regparm (const_tree, const_tree);
2444 static void ix86_compute_frame_layout (struct ix86_frame *);
2445 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2446 rtx, rtx, int);
2447 static void ix86_add_new_builtins (HOST_WIDE_INT);
2448 static tree ix86_canonical_va_list_type (tree);
2449 static void predict_jump (int);
2450 static unsigned int split_stack_prologue_scratch_regno (void);
2451 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2453 enum ix86_function_specific_strings
2455 IX86_FUNCTION_SPECIFIC_ARCH,
2456 IX86_FUNCTION_SPECIFIC_TUNE,
2457 IX86_FUNCTION_SPECIFIC_MAX
2460 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2461 const char *, enum fpmath_unit, bool);
2462 static void ix86_function_specific_save (struct cl_target_option *,
2463 struct gcc_options *opts);
2464 static void ix86_function_specific_restore (struct gcc_options *opts,
2465 struct cl_target_option *);
2466 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
2467 static void ix86_function_specific_print (FILE *, int,
2468 struct cl_target_option *);
2469 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2470 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2471 struct gcc_options *,
2472 struct gcc_options *,
2473 struct gcc_options *);
2474 static bool ix86_can_inline_p (tree, tree);
2475 static void ix86_set_current_function (tree);
2476 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2478 static enum calling_abi ix86_function_abi (const_tree);
2481 #ifndef SUBTARGET32_DEFAULT_CPU
2482 #define SUBTARGET32_DEFAULT_CPU "i386"
2483 #endif
2485 /* Whether -mtune= or -march= were specified */
2486 static int ix86_tune_defaulted;
2487 static int ix86_arch_specified;
2489 /* Vectorization library interface and handlers. */
2490 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2492 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2493 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2495 /* Processor target table, indexed by processor number */
2496 struct ptt
2498 const char *const name; /* processor name */
2499 const struct processor_costs *cost; /* Processor costs */
2500 const int align_loop; /* Default alignments. */
2501 const int align_loop_max_skip;
2502 const int align_jump;
2503 const int align_jump_max_skip;
2504 const int align_func;
2507 /* This table must be in sync with enum processor_type in i386.h. */
2508 static const struct ptt processor_target_table[PROCESSOR_max] =
2510 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2511 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2512 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2513 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2514 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2515 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2516 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2517 {"core2", &core_cost, 16, 10, 16, 10, 16},
2518 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2519 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2520 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2521 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2522 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2523 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2524 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2525 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2526 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2527 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2528 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2529 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2530 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2531 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2532 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2533 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2534 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2535 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2538 static unsigned int
2539 rest_of_handle_insert_vzeroupper (void)
2541 int i;
2543 /* vzeroupper instructions are inserted immediately after reload to
2544 account for possible spills from 256bit registers. The pass
2545 reuses mode switching infrastructure by re-running mode insertion
2546 pass, so disable entities that have already been processed. */
2547 for (i = 0; i < MAX_386_ENTITIES; i++)
2548 ix86_optimize_mode_switching[i] = 0;
2550 ix86_optimize_mode_switching[AVX_U128] = 1;
2552 /* Call optimize_mode_switching. */
2553 g->get_passes ()->execute_pass_mode_switching ();
2554 return 0;
2557 namespace {
2559 const pass_data pass_data_insert_vzeroupper =
2561 RTL_PASS, /* type */
2562 "vzeroupper", /* name */
2563 OPTGROUP_NONE, /* optinfo_flags */
2564 TV_NONE, /* tv_id */
2565 0, /* properties_required */
2566 0, /* properties_provided */
2567 0, /* properties_destroyed */
2568 0, /* todo_flags_start */
2569 TODO_df_finish, /* todo_flags_finish */
2572 class pass_insert_vzeroupper : public rtl_opt_pass
2574 public:
2575 pass_insert_vzeroupper(gcc::context *ctxt)
2576 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2579 /* opt_pass methods: */
2580 virtual bool gate (function *)
2582 return TARGET_AVX && !TARGET_AVX512F
2583 && TARGET_VZEROUPPER && flag_expensive_optimizations
2584 && !optimize_size;
2587 virtual unsigned int execute (function *)
2589 return rest_of_handle_insert_vzeroupper ();
2592 }; // class pass_insert_vzeroupper
2594 } // anon namespace
2596 rtl_opt_pass *
2597 make_pass_insert_vzeroupper (gcc::context *ctxt)
2599 return new pass_insert_vzeroupper (ctxt);
2602 /* Return true if a red-zone is in use. */
2604 static inline bool
2605 ix86_using_red_zone (void)
2607 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2610 /* Return a string that documents the current -m options. The caller is
2611 responsible for freeing the string. */
2613 static char *
2614 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2615 const char *tune, enum fpmath_unit fpmath,
2616 bool add_nl_p)
2618 struct ix86_target_opts
2620 const char *option; /* option string */
2621 HOST_WIDE_INT mask; /* isa mask options */
2624 /* This table is ordered so that options like -msse4.2 that imply
2625 preceding options while match those first. */
2626 static struct ix86_target_opts isa_opts[] =
2628 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2629 { "-mfma", OPTION_MASK_ISA_FMA },
2630 { "-mxop", OPTION_MASK_ISA_XOP },
2631 { "-mlwp", OPTION_MASK_ISA_LWP },
2632 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2633 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2634 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2635 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2636 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2637 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2638 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2639 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2640 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2641 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2642 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2643 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2644 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2645 { "-msse3", OPTION_MASK_ISA_SSE3 },
2646 { "-msse2", OPTION_MASK_ISA_SSE2 },
2647 { "-msse", OPTION_MASK_ISA_SSE },
2648 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2649 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2650 { "-mmmx", OPTION_MASK_ISA_MMX },
2651 { "-mabm", OPTION_MASK_ISA_ABM },
2652 { "-mbmi", OPTION_MASK_ISA_BMI },
2653 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2654 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2655 { "-mhle", OPTION_MASK_ISA_HLE },
2656 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2657 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2658 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2659 { "-madx", OPTION_MASK_ISA_ADX },
2660 { "-mtbm", OPTION_MASK_ISA_TBM },
2661 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2662 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2663 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2664 { "-maes", OPTION_MASK_ISA_AES },
2665 { "-msha", OPTION_MASK_ISA_SHA },
2666 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2667 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2668 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2669 { "-mf16c", OPTION_MASK_ISA_F16C },
2670 { "-mrtm", OPTION_MASK_ISA_RTM },
2671 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2672 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2673 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2674 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2675 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2676 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2677 { "-mmpx", OPTION_MASK_ISA_MPX },
2678 { "-mclwb", OPTION_MASK_ISA_CLWB },
2679 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
2682 /* Flag options. */
2683 static struct ix86_target_opts flag_opts[] =
2685 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2686 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2687 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2688 { "-m80387", MASK_80387 },
2689 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2690 { "-malign-double", MASK_ALIGN_DOUBLE },
2691 { "-mcld", MASK_CLD },
2692 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2693 { "-mieee-fp", MASK_IEEE_FP },
2694 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2695 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2696 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2697 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2698 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2699 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2700 { "-mno-red-zone", MASK_NO_RED_ZONE },
2701 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2702 { "-mrecip", MASK_RECIP },
2703 { "-mrtd", MASK_RTD },
2704 { "-msseregparm", MASK_SSEREGPARM },
2705 { "-mstack-arg-probe", MASK_STACK_PROBE },
2706 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2707 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2708 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2709 { "-mvzeroupper", MASK_VZEROUPPER },
2710 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2711 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2712 { "-mprefer-avx128", MASK_PREFER_AVX128},
2715 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2717 char isa_other[40];
2718 char target_other[40];
2719 unsigned num = 0;
2720 unsigned i, j;
2721 char *ret;
2722 char *ptr;
2723 size_t len;
2724 size_t line_len;
2725 size_t sep_len;
2726 const char *abi;
2728 memset (opts, '\0', sizeof (opts));
2730 /* Add -march= option. */
2731 if (arch)
2733 opts[num][0] = "-march=";
2734 opts[num++][1] = arch;
2737 /* Add -mtune= option. */
2738 if (tune)
2740 opts[num][0] = "-mtune=";
2741 opts[num++][1] = tune;
2744 /* Add -m32/-m64/-mx32. */
2745 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2747 if ((isa & OPTION_MASK_ABI_64) != 0)
2748 abi = "-m64";
2749 else
2750 abi = "-mx32";
2751 isa &= ~ (OPTION_MASK_ISA_64BIT
2752 | OPTION_MASK_ABI_64
2753 | OPTION_MASK_ABI_X32);
2755 else
2756 abi = "-m32";
2757 opts[num++][0] = abi;
2759 /* Pick out the options in isa options. */
2760 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2762 if ((isa & isa_opts[i].mask) != 0)
2764 opts[num++][0] = isa_opts[i].option;
2765 isa &= ~ isa_opts[i].mask;
2769 if (isa && add_nl_p)
2771 opts[num++][0] = isa_other;
2772 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2773 isa);
2776 /* Add flag options. */
2777 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2779 if ((flags & flag_opts[i].mask) != 0)
2781 opts[num++][0] = flag_opts[i].option;
2782 flags &= ~ flag_opts[i].mask;
2786 if (flags && add_nl_p)
2788 opts[num++][0] = target_other;
2789 sprintf (target_other, "(other flags: %#x)", flags);
2792 /* Add -fpmath= option. */
2793 if (fpmath)
2795 opts[num][0] = "-mfpmath=";
2796 switch ((int) fpmath)
2798 case FPMATH_387:
2799 opts[num++][1] = "387";
2800 break;
2802 case FPMATH_SSE:
2803 opts[num++][1] = "sse";
2804 break;
2806 case FPMATH_387 | FPMATH_SSE:
2807 opts[num++][1] = "sse+387";
2808 break;
2810 default:
2811 gcc_unreachable ();
2815 /* Any options? */
2816 if (num == 0)
2817 return NULL;
2819 gcc_assert (num < ARRAY_SIZE (opts));
2821 /* Size the string. */
2822 len = 0;
2823 sep_len = (add_nl_p) ? 3 : 1;
2824 for (i = 0; i < num; i++)
2826 len += sep_len;
2827 for (j = 0; j < 2; j++)
2828 if (opts[i][j])
2829 len += strlen (opts[i][j]);
2832 /* Build the string. */
2833 ret = ptr = (char *) xmalloc (len);
2834 line_len = 0;
2836 for (i = 0; i < num; i++)
2838 size_t len2[2];
2840 for (j = 0; j < 2; j++)
2841 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2843 if (i != 0)
2845 *ptr++ = ' ';
2846 line_len++;
2848 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2850 *ptr++ = '\\';
2851 *ptr++ = '\n';
2852 line_len = 0;
2856 for (j = 0; j < 2; j++)
2857 if (opts[i][j])
2859 memcpy (ptr, opts[i][j], len2[j]);
2860 ptr += len2[j];
2861 line_len += len2[j];
2865 *ptr = '\0';
2866 gcc_assert (ret + len >= ptr);
2868 return ret;
2871 /* Return true, if profiling code should be emitted before
2872 prologue. Otherwise it returns false.
2873 Note: For x86 with "hotfix" it is sorried. */
2874 static bool
2875 ix86_profile_before_prologue (void)
2877 return flag_fentry != 0;
2880 /* Function that is callable from the debugger to print the current
2881 options. */
2882 void ATTRIBUTE_UNUSED
2883 ix86_debug_options (void)
2885 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2886 ix86_arch_string, ix86_tune_string,
2887 ix86_fpmath, true);
2889 if (opts)
2891 fprintf (stderr, "%s\n\n", opts);
2892 free (opts);
2894 else
2895 fputs ("<no options>\n\n", stderr);
2897 return;
2900 static const char *stringop_alg_names[] = {
2901 #define DEF_ENUM
2902 #define DEF_ALG(alg, name) #name,
2903 #include "stringop.def"
2904 #undef DEF_ENUM
2905 #undef DEF_ALG
2908 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2909 The string is of the following form (or comma separated list of it):
2911 strategy_alg:max_size:[align|noalign]
2913 where the full size range for the strategy is either [0, max_size] or
2914 [min_size, max_size], in which min_size is the max_size + 1 of the
2915 preceding range. The last size range must have max_size == -1.
2917 Examples:
2920 -mmemcpy-strategy=libcall:-1:noalign
2922 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2926 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2928 This is to tell the compiler to use the following strategy for memset
2929 1) when the expected size is between [1, 16], use rep_8byte strategy;
2930 2) when the size is between [17, 2048], use vector_loop;
2931 3) when the size is > 2048, use libcall. */
2933 struct stringop_size_range
2935 int max;
2936 stringop_alg alg;
2937 bool noalign;
2940 static void
2941 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2943 const struct stringop_algs *default_algs;
2944 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2945 char *curr_range_str, *next_range_str;
2946 int i = 0, n = 0;
2948 if (is_memset)
2949 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2950 else
2951 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2953 curr_range_str = strategy_str;
2957 int maxs;
2958 char alg_name[128];
2959 char align[16];
2960 next_range_str = strchr (curr_range_str, ',');
2961 if (next_range_str)
2962 *next_range_str++ = '\0';
2964 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2965 alg_name, &maxs, align))
2967 error ("wrong arg %s to option %s", curr_range_str,
2968 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2969 return;
2972 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2974 error ("size ranges of option %s should be increasing",
2975 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2976 return;
2979 for (i = 0; i < last_alg; i++)
2980 if (!strcmp (alg_name, stringop_alg_names[i]))
2981 break;
2983 if (i == last_alg)
2985 error ("wrong stringop strategy name %s specified for option %s",
2986 alg_name,
2987 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2988 return;
2991 input_ranges[n].max = maxs;
2992 input_ranges[n].alg = (stringop_alg) i;
2993 if (!strcmp (align, "align"))
2994 input_ranges[n].noalign = false;
2995 else if (!strcmp (align, "noalign"))
2996 input_ranges[n].noalign = true;
2997 else
2999 error ("unknown alignment %s specified for option %s",
3000 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3001 return;
3003 n++;
3004 curr_range_str = next_range_str;
3006 while (curr_range_str);
3008 if (input_ranges[n - 1].max != -1)
3010 error ("the max value for the last size range should be -1"
3011 " for option %s",
3012 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3013 return;
3016 if (n > MAX_STRINGOP_ALGS)
3018 error ("too many size ranges specified in option %s",
3019 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3020 return;
3023 /* Now override the default algs array. */
3024 for (i = 0; i < n; i++)
3026 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3027 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3028 = input_ranges[i].alg;
3029 *const_cast<int *>(&default_algs->size[i].noalign)
3030 = input_ranges[i].noalign;
3035 /* parse -mtune-ctrl= option. When DUMP is true,
3036 print the features that are explicitly set. */
3038 static void
3039 parse_mtune_ctrl_str (bool dump)
3041 if (!ix86_tune_ctrl_string)
3042 return;
3044 char *next_feature_string = NULL;
3045 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3046 char *orig = curr_feature_string;
3047 int i;
3050 bool clear = false;
3052 next_feature_string = strchr (curr_feature_string, ',');
3053 if (next_feature_string)
3054 *next_feature_string++ = '\0';
3055 if (*curr_feature_string == '^')
3057 curr_feature_string++;
3058 clear = true;
3060 for (i = 0; i < X86_TUNE_LAST; i++)
3062 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3064 ix86_tune_features[i] = !clear;
3065 if (dump)
3066 fprintf (stderr, "Explicitly %s feature %s\n",
3067 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3068 break;
3071 if (i == X86_TUNE_LAST)
3072 error ("Unknown parameter to option -mtune-ctrl: %s",
3073 clear ? curr_feature_string - 1 : curr_feature_string);
3074 curr_feature_string = next_feature_string;
3076 while (curr_feature_string);
3077 free (orig);
3080 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3081 processor type. */
3083 static void
3084 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3086 unsigned int ix86_tune_mask = 1u << ix86_tune;
3087 int i;
3089 for (i = 0; i < X86_TUNE_LAST; ++i)
3091 if (ix86_tune_no_default)
3092 ix86_tune_features[i] = 0;
3093 else
3094 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3097 if (dump)
3099 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3100 for (i = 0; i < X86_TUNE_LAST; i++)
3101 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3102 ix86_tune_features[i] ? "on" : "off");
3105 parse_mtune_ctrl_str (dump);
3109 /* Override various settings based on options. If MAIN_ARGS_P, the
3110 options are from the command line, otherwise they are from
3111 attributes. */
3113 static void
3114 ix86_option_override_internal (bool main_args_p,
3115 struct gcc_options *opts,
3116 struct gcc_options *opts_set)
3118 int i;
3119 unsigned int ix86_arch_mask;
3120 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3121 const char *prefix;
3122 const char *suffix;
3123 const char *sw;
3125 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3126 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3127 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3128 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3129 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3130 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3131 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3132 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3133 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3134 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3135 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3136 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3137 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3138 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3139 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3140 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3141 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3142 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3143 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3144 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3145 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3146 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3147 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3148 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3149 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3150 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3151 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3152 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3153 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3154 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3155 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3156 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3157 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3158 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3159 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3160 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3161 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3162 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3163 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3164 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3165 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3166 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3167 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3168 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3169 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3170 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3171 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3172 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3173 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3174 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3175 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3176 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3177 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3178 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3179 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3180 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3181 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
3183 #define PTA_CORE2 \
3184 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3185 | PTA_CX16 | PTA_FXSR)
3186 #define PTA_NEHALEM \
3187 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3188 #define PTA_WESTMERE \
3189 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3190 #define PTA_SANDYBRIDGE \
3191 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3192 #define PTA_IVYBRIDGE \
3193 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3194 #define PTA_HASWELL \
3195 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3196 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3197 #define PTA_BROADWELL \
3198 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3199 #define PTA_KNL \
3200 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3201 #define PTA_BONNELL \
3202 (PTA_CORE2 | PTA_MOVBE)
3203 #define PTA_SILVERMONT \
3204 (PTA_WESTMERE | PTA_MOVBE)
3206 /* if this reaches 64, need to widen struct pta flags below */
3208 static struct pta
3210 const char *const name; /* processor name or nickname. */
3211 const enum processor_type processor;
3212 const enum attr_cpu schedule;
3213 const unsigned HOST_WIDE_INT flags;
3215 const processor_alias_table[] =
3217 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3218 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3219 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3220 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3221 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3222 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3223 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3224 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3225 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3226 PTA_MMX | PTA_SSE | PTA_FXSR},
3227 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3228 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3229 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3230 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3231 PTA_MMX | PTA_SSE | PTA_FXSR},
3232 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3233 PTA_MMX | PTA_SSE | PTA_FXSR},
3234 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3235 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3236 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3237 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3238 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3239 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3240 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3241 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3242 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3243 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3244 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3245 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3246 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3247 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3248 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3249 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3250 PTA_SANDYBRIDGE},
3251 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3252 PTA_SANDYBRIDGE},
3253 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3254 PTA_IVYBRIDGE},
3255 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3256 PTA_IVYBRIDGE},
3257 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3258 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3259 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3260 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3261 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3262 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3263 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3264 {"knl", PROCESSOR_KNL, CPU_KNL, PTA_KNL},
3265 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3266 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3267 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3268 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3269 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3270 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3271 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3272 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3273 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3274 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3275 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3276 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3277 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3278 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3279 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3280 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3281 {"x86-64", PROCESSOR_K8, CPU_K8,
3282 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3283 {"k8", PROCESSOR_K8, CPU_K8,
3284 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3285 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3286 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3287 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3288 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3289 {"opteron", PROCESSOR_K8, CPU_K8,
3290 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3291 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3292 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3293 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3294 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3295 {"athlon64", PROCESSOR_K8, CPU_K8,
3296 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3297 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3298 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3299 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3300 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3301 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3302 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3303 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3304 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3305 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3306 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3307 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3308 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3309 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3310 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3311 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3312 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3313 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3314 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3315 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3316 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3317 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3318 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3319 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3320 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3321 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3322 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3323 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3324 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3325 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3326 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3327 | PTA_XSAVEOPT | PTA_FSGSBASE},
3328 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3329 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3330 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3331 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3332 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3333 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3334 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3335 | PTA_MOVBE},
3336 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3337 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3338 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3339 | PTA_FXSR | PTA_XSAVE},
3340 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3341 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3342 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3343 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3344 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3345 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3347 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3348 PTA_64BIT
3349 | PTA_HLE /* flags are only used for -march switch. */ },
3352 /* -mrecip options. */
3353 static struct
3355 const char *string; /* option name */
3356 unsigned int mask; /* mask bits to set */
3358 const recip_options[] =
3360 { "all", RECIP_MASK_ALL },
3361 { "none", RECIP_MASK_NONE },
3362 { "div", RECIP_MASK_DIV },
3363 { "sqrt", RECIP_MASK_SQRT },
3364 { "vec-div", RECIP_MASK_VEC_DIV },
3365 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3368 int const pta_size = ARRAY_SIZE (processor_alias_table);
3370 /* Set up prefix/suffix so the error messages refer to either the command
3371 line argument, or the attribute(target). */
3372 if (main_args_p)
3374 prefix = "-m";
3375 suffix = "";
3376 sw = "switch";
3378 else
3380 prefix = "option(\"";
3381 suffix = "\")";
3382 sw = "attribute";
3385 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3386 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3387 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3388 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3389 #ifdef TARGET_BI_ARCH
3390 else
3392 #if TARGET_BI_ARCH == 1
3393 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3394 is on and OPTION_MASK_ABI_X32 is off. We turn off
3395 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3396 -mx32. */
3397 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3398 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3399 #else
3400 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3401 on and OPTION_MASK_ABI_64 is off. We turn off
3402 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3403 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3404 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3405 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3406 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3407 #endif
3409 #endif
3411 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3413 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3414 OPTION_MASK_ABI_64 for TARGET_X32. */
3415 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3416 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3418 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3419 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3420 | OPTION_MASK_ABI_X32
3421 | OPTION_MASK_ABI_64);
3422 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3424 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3425 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3426 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3427 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3430 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3431 SUBTARGET_OVERRIDE_OPTIONS;
3432 #endif
3434 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3435 SUBSUBTARGET_OVERRIDE_OPTIONS;
3436 #endif
3438 /* -fPIC is the default for x86_64. */
3439 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3440 opts->x_flag_pic = 2;
3442 /* Need to check -mtune=generic first. */
3443 if (opts->x_ix86_tune_string)
3445 /* As special support for cross compilers we read -mtune=native
3446 as -mtune=generic. With native compilers we won't see the
3447 -mtune=native, as it was changed by the driver. */
3448 if (!strcmp (opts->x_ix86_tune_string, "native"))
3450 opts->x_ix86_tune_string = "generic";
3452 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3453 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3454 "%stune=k8%s or %stune=generic%s instead as appropriate",
3455 prefix, suffix, prefix, suffix, prefix, suffix);
3457 else
3459 if (opts->x_ix86_arch_string)
3460 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3461 if (!opts->x_ix86_tune_string)
3463 opts->x_ix86_tune_string
3464 = processor_target_table[TARGET_CPU_DEFAULT].name;
3465 ix86_tune_defaulted = 1;
3468 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3469 or defaulted. We need to use a sensible tune option. */
3470 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3472 opts->x_ix86_tune_string = "generic";
3476 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3477 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3479 /* rep; movq isn't available in 32-bit code. */
3480 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3481 opts->x_ix86_stringop_alg = no_stringop;
3484 if (!opts->x_ix86_arch_string)
3485 opts->x_ix86_arch_string
3486 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3487 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3488 else
3489 ix86_arch_specified = 1;
3491 if (opts_set->x_ix86_pmode)
3493 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3494 && opts->x_ix86_pmode == PMODE_SI)
3495 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3496 && opts->x_ix86_pmode == PMODE_DI))
3497 error ("address mode %qs not supported in the %s bit mode",
3498 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3499 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3501 else
3502 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3503 ? PMODE_DI : PMODE_SI;
3505 if (!opts_set->x_ix86_abi)
3506 opts->x_ix86_abi = DEFAULT_ABI;
3508 /* For targets using ms ABI enable ms-extensions, if not
3509 explicit turned off. For non-ms ABI we turn off this
3510 option. */
3511 if (!opts_set->x_flag_ms_extensions)
3512 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3514 if (opts_set->x_ix86_cmodel)
3516 switch (opts->x_ix86_cmodel)
3518 case CM_SMALL:
3519 case CM_SMALL_PIC:
3520 if (opts->x_flag_pic)
3521 opts->x_ix86_cmodel = CM_SMALL_PIC;
3522 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3523 error ("code model %qs not supported in the %s bit mode",
3524 "small", "32");
3525 break;
3527 case CM_MEDIUM:
3528 case CM_MEDIUM_PIC:
3529 if (opts->x_flag_pic)
3530 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3531 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3532 error ("code model %qs not supported in the %s bit mode",
3533 "medium", "32");
3534 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3535 error ("code model %qs not supported in x32 mode",
3536 "medium");
3537 break;
3539 case CM_LARGE:
3540 case CM_LARGE_PIC:
3541 if (opts->x_flag_pic)
3542 opts->x_ix86_cmodel = CM_LARGE_PIC;
3543 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3544 error ("code model %qs not supported in the %s bit mode",
3545 "large", "32");
3546 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3547 error ("code model %qs not supported in x32 mode",
3548 "large");
3549 break;
3551 case CM_32:
3552 if (opts->x_flag_pic)
3553 error ("code model %s does not support PIC mode", "32");
3554 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3555 error ("code model %qs not supported in the %s bit mode",
3556 "32", "64");
3557 break;
3559 case CM_KERNEL:
3560 if (opts->x_flag_pic)
3562 error ("code model %s does not support PIC mode", "kernel");
3563 opts->x_ix86_cmodel = CM_32;
3565 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3566 error ("code model %qs not supported in the %s bit mode",
3567 "kernel", "32");
3568 break;
3570 default:
3571 gcc_unreachable ();
3574 else
3576 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3577 use of rip-relative addressing. This eliminates fixups that
3578 would otherwise be needed if this object is to be placed in a
3579 DLL, and is essentially just as efficient as direct addressing. */
3580 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3581 && (TARGET_RDOS || TARGET_PECOFF))
3582 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3583 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3584 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3585 else
3586 opts->x_ix86_cmodel = CM_32;
3588 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3590 error ("-masm=intel not supported in this configuration");
3591 opts->x_ix86_asm_dialect = ASM_ATT;
3593 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3594 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3595 sorry ("%i-bit mode not compiled in",
3596 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3598 for (i = 0; i < pta_size; i++)
3599 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3601 ix86_schedule = processor_alias_table[i].schedule;
3602 ix86_arch = processor_alias_table[i].processor;
3603 /* Default cpu tuning to the architecture. */
3604 ix86_tune = ix86_arch;
3606 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3607 && !(processor_alias_table[i].flags & PTA_64BIT))
3608 error ("CPU you selected does not support x86-64 "
3609 "instruction set");
3611 if (processor_alias_table[i].flags & PTA_MMX
3612 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3613 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3614 if (processor_alias_table[i].flags & PTA_3DNOW
3615 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3616 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3617 if (processor_alias_table[i].flags & PTA_3DNOW_A
3618 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3619 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3620 if (processor_alias_table[i].flags & PTA_SSE
3621 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3622 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3623 if (processor_alias_table[i].flags & PTA_SSE2
3624 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3625 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3626 if (processor_alias_table[i].flags & PTA_SSE3
3627 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3628 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3629 if (processor_alias_table[i].flags & PTA_SSSE3
3630 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3631 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3632 if (processor_alias_table[i].flags & PTA_SSE4_1
3633 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3634 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3635 if (processor_alias_table[i].flags & PTA_SSE4_2
3636 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3637 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3638 if (processor_alias_table[i].flags & PTA_AVX
3639 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3640 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3641 if (processor_alias_table[i].flags & PTA_AVX2
3642 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3643 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3644 if (processor_alias_table[i].flags & PTA_FMA
3645 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3646 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3647 if (processor_alias_table[i].flags & PTA_SSE4A
3648 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3649 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3650 if (processor_alias_table[i].flags & PTA_FMA4
3651 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3652 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3653 if (processor_alias_table[i].flags & PTA_XOP
3654 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3655 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3656 if (processor_alias_table[i].flags & PTA_LWP
3657 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3658 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3659 if (processor_alias_table[i].flags & PTA_ABM
3660 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3661 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3662 if (processor_alias_table[i].flags & PTA_BMI
3663 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3664 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3665 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3666 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3667 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3668 if (processor_alias_table[i].flags & PTA_TBM
3669 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3670 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3671 if (processor_alias_table[i].flags & PTA_BMI2
3672 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3673 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3674 if (processor_alias_table[i].flags & PTA_CX16
3675 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3676 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3677 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3678 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3679 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3680 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3681 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3682 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3683 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3684 if (processor_alias_table[i].flags & PTA_MOVBE
3685 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3686 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3687 if (processor_alias_table[i].flags & PTA_AES
3688 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3689 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3690 if (processor_alias_table[i].flags & PTA_SHA
3691 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3692 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3693 if (processor_alias_table[i].flags & PTA_PCLMUL
3694 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3695 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3696 if (processor_alias_table[i].flags & PTA_FSGSBASE
3697 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3698 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3699 if (processor_alias_table[i].flags & PTA_RDRND
3700 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3701 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3702 if (processor_alias_table[i].flags & PTA_F16C
3703 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3704 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3705 if (processor_alias_table[i].flags & PTA_RTM
3706 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3707 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3708 if (processor_alias_table[i].flags & PTA_HLE
3709 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3710 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3711 if (processor_alias_table[i].flags & PTA_PRFCHW
3712 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3713 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3714 if (processor_alias_table[i].flags & PTA_RDSEED
3715 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3716 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3717 if (processor_alias_table[i].flags & PTA_ADX
3718 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3719 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3720 if (processor_alias_table[i].flags & PTA_FXSR
3721 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3722 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3723 if (processor_alias_table[i].flags & PTA_XSAVE
3724 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3725 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3726 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3727 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3728 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3729 if (processor_alias_table[i].flags & PTA_AVX512F
3730 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3731 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3732 if (processor_alias_table[i].flags & PTA_AVX512ER
3733 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3734 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3735 if (processor_alias_table[i].flags & PTA_AVX512PF
3736 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3737 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3738 if (processor_alias_table[i].flags & PTA_AVX512CD
3739 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3740 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3741 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3742 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3743 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3744 if (processor_alias_table[i].flags & PTA_PCOMMIT
3745 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
3746 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
3747 if (processor_alias_table[i].flags & PTA_CLWB
3748 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3749 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3750 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3751 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3752 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3753 if (processor_alias_table[i].flags & PTA_XSAVEC
3754 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3755 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3756 if (processor_alias_table[i].flags & PTA_XSAVES
3757 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3758 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3759 if (processor_alias_table[i].flags & PTA_AVX512DQ
3760 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3761 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3762 if (processor_alias_table[i].flags & PTA_AVX512BW
3763 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3764 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3765 if (processor_alias_table[i].flags & PTA_AVX512VL
3766 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3767 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3768 if (processor_alias_table[i].flags & PTA_MPX
3769 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3770 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3771 if (processor_alias_table[i].flags & PTA_AVX512VBMI
3772 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3773 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3774 if (processor_alias_table[i].flags & PTA_AVX512IFMA
3775 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3776 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3777 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3778 x86_prefetch_sse = true;
3780 break;
3783 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3784 error ("Intel MPX does not support x32");
3786 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3787 error ("Intel MPX does not support x32");
3789 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3790 error ("generic CPU can be used only for %stune=%s %s",
3791 prefix, suffix, sw);
3792 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3793 error ("intel CPU can be used only for %stune=%s %s",
3794 prefix, suffix, sw);
3795 else if (i == pta_size)
3796 error ("bad value (%s) for %sarch=%s %s",
3797 opts->x_ix86_arch_string, prefix, suffix, sw);
3799 ix86_arch_mask = 1u << ix86_arch;
3800 for (i = 0; i < X86_ARCH_LAST; ++i)
3801 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3803 for (i = 0; i < pta_size; i++)
3804 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3806 ix86_schedule = processor_alias_table[i].schedule;
3807 ix86_tune = processor_alias_table[i].processor;
3808 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3810 if (!(processor_alias_table[i].flags & PTA_64BIT))
3812 if (ix86_tune_defaulted)
3814 opts->x_ix86_tune_string = "x86-64";
3815 for (i = 0; i < pta_size; i++)
3816 if (! strcmp (opts->x_ix86_tune_string,
3817 processor_alias_table[i].name))
3818 break;
3819 ix86_schedule = processor_alias_table[i].schedule;
3820 ix86_tune = processor_alias_table[i].processor;
3822 else
3823 error ("CPU you selected does not support x86-64 "
3824 "instruction set");
3827 /* Intel CPUs have always interpreted SSE prefetch instructions as
3828 NOPs; so, we can enable SSE prefetch instructions even when
3829 -mtune (rather than -march) points us to a processor that has them.
3830 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3831 higher processors. */
3832 if (TARGET_CMOV
3833 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3834 x86_prefetch_sse = true;
3835 break;
3838 if (ix86_tune_specified && i == pta_size)
3839 error ("bad value (%s) for %stune=%s %s",
3840 opts->x_ix86_tune_string, prefix, suffix, sw);
3842 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3844 #ifndef USE_IX86_FRAME_POINTER
3845 #define USE_IX86_FRAME_POINTER 0
3846 #endif
3848 #ifndef USE_X86_64_FRAME_POINTER
3849 #define USE_X86_64_FRAME_POINTER 0
3850 #endif
3852 /* Set the default values for switches whose default depends on TARGET_64BIT
3853 in case they weren't overwritten by command line options. */
3854 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3856 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3857 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3858 if (opts->x_flag_asynchronous_unwind_tables
3859 && !opts_set->x_flag_unwind_tables
3860 && TARGET_64BIT_MS_ABI)
3861 opts->x_flag_unwind_tables = 1;
3862 if (opts->x_flag_asynchronous_unwind_tables == 2)
3863 opts->x_flag_unwind_tables
3864 = opts->x_flag_asynchronous_unwind_tables = 1;
3865 if (opts->x_flag_pcc_struct_return == 2)
3866 opts->x_flag_pcc_struct_return = 0;
3868 else
3870 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3871 opts->x_flag_omit_frame_pointer
3872 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3873 if (opts->x_flag_asynchronous_unwind_tables == 2)
3874 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3875 if (opts->x_flag_pcc_struct_return == 2)
3876 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3879 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3880 /* TODO: ix86_cost should be chosen at instruction or function granuality
3881 so for cold code we use size_cost even in !optimize_size compilation. */
3882 if (opts->x_optimize_size)
3883 ix86_cost = &ix86_size_cost;
3884 else
3885 ix86_cost = ix86_tune_cost;
3887 /* Arrange to set up i386_stack_locals for all functions. */
3888 init_machine_status = ix86_init_machine_status;
3890 /* Validate -mregparm= value. */
3891 if (opts_set->x_ix86_regparm)
3893 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3894 warning (0, "-mregparm is ignored in 64-bit mode");
3895 if (opts->x_ix86_regparm > REGPARM_MAX)
3897 error ("-mregparm=%d is not between 0 and %d",
3898 opts->x_ix86_regparm, REGPARM_MAX);
3899 opts->x_ix86_regparm = 0;
3902 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3903 opts->x_ix86_regparm = REGPARM_MAX;
3905 /* Default align_* from the processor table. */
3906 if (opts->x_align_loops == 0)
3908 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3909 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3911 if (opts->x_align_jumps == 0)
3913 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3914 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3916 if (opts->x_align_functions == 0)
3918 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3921 /* Provide default for -mbranch-cost= value. */
3922 if (!opts_set->x_ix86_branch_cost)
3923 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
3925 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3927 opts->x_target_flags
3928 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3930 /* Enable by default the SSE and MMX builtins. Do allow the user to
3931 explicitly disable any of these. In particular, disabling SSE and
3932 MMX for kernel code is extremely useful. */
3933 if (!ix86_arch_specified)
3934 opts->x_ix86_isa_flags
3935 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3936 | TARGET_SUBTARGET64_ISA_DEFAULT)
3937 & ~opts->x_ix86_isa_flags_explicit);
3939 if (TARGET_RTD_P (opts->x_target_flags))
3940 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3942 else
3944 opts->x_target_flags
3945 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3947 if (!ix86_arch_specified)
3948 opts->x_ix86_isa_flags
3949 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3951 /* i386 ABI does not specify red zone. It still makes sense to use it
3952 when programmer takes care to stack from being destroyed. */
3953 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3954 opts->x_target_flags |= MASK_NO_RED_ZONE;
3957 /* Keep nonleaf frame pointers. */
3958 if (opts->x_flag_omit_frame_pointer)
3959 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3960 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3961 opts->x_flag_omit_frame_pointer = 1;
3963 /* If we're doing fast math, we don't care about comparison order
3964 wrt NaNs. This lets us use a shorter comparison sequence. */
3965 if (opts->x_flag_finite_math_only)
3966 opts->x_target_flags &= ~MASK_IEEE_FP;
3968 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3969 since the insns won't need emulation. */
3970 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3971 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3973 /* Likewise, if the target doesn't have a 387, or we've specified
3974 software floating point, don't use 387 inline intrinsics. */
3975 if (!TARGET_80387_P (opts->x_target_flags))
3976 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3978 /* Turn on MMX builtins for -msse. */
3979 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3980 opts->x_ix86_isa_flags
3981 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3983 /* Enable SSE prefetch. */
3984 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3985 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3986 x86_prefetch_sse = true;
3988 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3989 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3990 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3991 opts->x_ix86_isa_flags
3992 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3994 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3995 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3996 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3997 opts->x_ix86_isa_flags
3998 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
4000 /* Enable lzcnt instruction for -mabm. */
4001 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4002 opts->x_ix86_isa_flags
4003 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4005 /* Validate -mpreferred-stack-boundary= value or default it to
4006 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4007 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4008 if (opts_set->x_ix86_preferred_stack_boundary_arg)
4010 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4011 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
4012 int max = (TARGET_SEH ? 4 : 12);
4014 if (opts->x_ix86_preferred_stack_boundary_arg < min
4015 || opts->x_ix86_preferred_stack_boundary_arg > max)
4017 if (min == max)
4018 error ("-mpreferred-stack-boundary is not supported "
4019 "for this target");
4020 else
4021 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4022 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4024 else
4025 ix86_preferred_stack_boundary
4026 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4029 /* Set the default value for -mstackrealign. */
4030 if (opts->x_ix86_force_align_arg_pointer == -1)
4031 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4033 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4035 /* Validate -mincoming-stack-boundary= value or default it to
4036 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4037 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4038 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4040 if (opts->x_ix86_incoming_stack_boundary_arg
4041 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
4042 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4043 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4044 opts->x_ix86_incoming_stack_boundary_arg,
4045 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
4046 else
4048 ix86_user_incoming_stack_boundary
4049 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4050 ix86_incoming_stack_boundary
4051 = ix86_user_incoming_stack_boundary;
4055 #ifndef NO_PROFILE_COUNTERS
4056 if (flag_nop_mcount)
4057 error ("-mnop-mcount is not compatible with this target");
4058 #endif
4059 if (flag_nop_mcount && flag_pic)
4060 error ("-mnop-mcount is not implemented for -fPIC");
4062 /* Accept -msseregparm only if at least SSE support is enabled. */
4063 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4064 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4065 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4067 if (opts_set->x_ix86_fpmath)
4069 if (opts->x_ix86_fpmath & FPMATH_SSE)
4071 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4073 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4074 opts->x_ix86_fpmath = FPMATH_387;
4076 else if ((opts->x_ix86_fpmath & FPMATH_387)
4077 && !TARGET_80387_P (opts->x_target_flags))
4079 warning (0, "387 instruction set disabled, using SSE arithmetics");
4080 opts->x_ix86_fpmath = FPMATH_SSE;
4084 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4085 fpmath=387. The second is however default at many targets since the
4086 extra 80bit precision of temporaries is considered to be part of ABI.
4087 Overwrite the default at least for -ffast-math.
4088 TODO: -mfpmath=both seems to produce same performing code with bit
4089 smaller binaries. It is however not clear if register allocation is
4090 ready for this setting.
4091 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4092 codegen. We may switch to 387 with -ffast-math for size optimized
4093 functions. */
4094 else if (fast_math_flags_set_p (&global_options)
4095 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4096 opts->x_ix86_fpmath = FPMATH_SSE;
4097 else
4098 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4100 /* If the i387 is disabled, then do not return values in it. */
4101 if (!TARGET_80387_P (opts->x_target_flags))
4102 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4104 /* Use external vectorized library in vectorizing intrinsics. */
4105 if (opts_set->x_ix86_veclibabi_type)
4106 switch (opts->x_ix86_veclibabi_type)
4108 case ix86_veclibabi_type_svml:
4109 ix86_veclib_handler = ix86_veclibabi_svml;
4110 break;
4112 case ix86_veclibabi_type_acml:
4113 ix86_veclib_handler = ix86_veclibabi_acml;
4114 break;
4116 default:
4117 gcc_unreachable ();
4120 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4121 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4122 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4124 /* If stack probes are required, the space used for large function
4125 arguments on the stack must also be probed, so enable
4126 -maccumulate-outgoing-args so this happens in the prologue. */
4127 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4128 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4130 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4131 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4132 "for correctness", prefix, suffix);
4133 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4136 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4138 char *p;
4139 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4140 p = strchr (internal_label_prefix, 'X');
4141 internal_label_prefix_len = p - internal_label_prefix;
4142 *p = '\0';
4145 /* When scheduling description is not available, disable scheduler pass
4146 so it won't slow down the compilation and make x87 code slower. */
4147 if (!TARGET_SCHEDULE)
4148 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4150 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4151 ix86_tune_cost->simultaneous_prefetches,
4152 opts->x_param_values,
4153 opts_set->x_param_values);
4154 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4155 ix86_tune_cost->prefetch_block,
4156 opts->x_param_values,
4157 opts_set->x_param_values);
4158 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4159 ix86_tune_cost->l1_cache_size,
4160 opts->x_param_values,
4161 opts_set->x_param_values);
4162 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4163 ix86_tune_cost->l2_cache_size,
4164 opts->x_param_values,
4165 opts_set->x_param_values);
4167 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4168 if (opts->x_flag_prefetch_loop_arrays < 0
4169 && HAVE_prefetch
4170 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4171 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4172 opts->x_flag_prefetch_loop_arrays = 1;
4174 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4175 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4176 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4177 targetm.expand_builtin_va_start = NULL;
4179 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4181 ix86_gen_leave = gen_leave_rex64;
4182 if (Pmode == DImode)
4184 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4185 ix86_gen_tls_local_dynamic_base_64
4186 = gen_tls_local_dynamic_base_64_di;
4188 else
4190 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4191 ix86_gen_tls_local_dynamic_base_64
4192 = gen_tls_local_dynamic_base_64_si;
4195 else
4196 ix86_gen_leave = gen_leave;
4198 if (Pmode == DImode)
4200 ix86_gen_add3 = gen_adddi3;
4201 ix86_gen_sub3 = gen_subdi3;
4202 ix86_gen_sub3_carry = gen_subdi3_carry;
4203 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4204 ix86_gen_andsp = gen_anddi3;
4205 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4206 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4207 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4208 ix86_gen_monitor = gen_sse3_monitor_di;
4210 else
4212 ix86_gen_add3 = gen_addsi3;
4213 ix86_gen_sub3 = gen_subsi3;
4214 ix86_gen_sub3_carry = gen_subsi3_carry;
4215 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4216 ix86_gen_andsp = gen_andsi3;
4217 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4218 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4219 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4220 ix86_gen_monitor = gen_sse3_monitor_si;
4223 #ifdef USE_IX86_CLD
4224 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4225 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4226 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4227 #endif
4229 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4231 if (opts->x_flag_fentry > 0)
4232 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4233 "with -fpic");
4234 opts->x_flag_fentry = 0;
4236 else if (TARGET_SEH)
4238 if (opts->x_flag_fentry == 0)
4239 sorry ("-mno-fentry isn%'t compatible with SEH");
4240 opts->x_flag_fentry = 1;
4242 else if (opts->x_flag_fentry < 0)
4244 #if defined(PROFILE_BEFORE_PROLOGUE)
4245 opts->x_flag_fentry = 1;
4246 #else
4247 opts->x_flag_fentry = 0;
4248 #endif
4251 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
4252 opts->x_target_flags |= MASK_VZEROUPPER;
4253 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4254 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4255 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4256 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4257 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4258 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4259 /* Enable 128-bit AVX instruction generation
4260 for the auto-vectorizer. */
4261 if (TARGET_AVX128_OPTIMAL
4262 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4263 opts->x_target_flags |= MASK_PREFER_AVX128;
4265 if (opts->x_ix86_recip_name)
4267 char *p = ASTRDUP (opts->x_ix86_recip_name);
4268 char *q;
4269 unsigned int mask, i;
4270 bool invert;
4272 while ((q = strtok (p, ",")) != NULL)
4274 p = NULL;
4275 if (*q == '!')
4277 invert = true;
4278 q++;
4280 else
4281 invert = false;
4283 if (!strcmp (q, "default"))
4284 mask = RECIP_MASK_ALL;
4285 else
4287 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4288 if (!strcmp (q, recip_options[i].string))
4290 mask = recip_options[i].mask;
4291 break;
4294 if (i == ARRAY_SIZE (recip_options))
4296 error ("unknown option for -mrecip=%s", q);
4297 invert = false;
4298 mask = RECIP_MASK_NONE;
4302 opts->x_recip_mask_explicit |= mask;
4303 if (invert)
4304 opts->x_recip_mask &= ~mask;
4305 else
4306 opts->x_recip_mask |= mask;
4310 if (TARGET_RECIP_P (opts->x_target_flags))
4311 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4312 else if (opts_set->x_target_flags & MASK_RECIP)
4313 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4315 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4316 for 64-bit Bionic. */
4317 if (TARGET_HAS_BIONIC
4318 && !(opts_set->x_target_flags
4319 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4320 opts->x_target_flags |= (TARGET_64BIT
4321 ? MASK_LONG_DOUBLE_128
4322 : MASK_LONG_DOUBLE_64);
4324 /* Only one of them can be active. */
4325 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4326 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4328 /* Save the initial options in case the user does function specific
4329 options. */
4330 if (main_args_p)
4331 target_option_default_node = target_option_current_node
4332 = build_target_option_node (opts);
4334 /* Handle stack protector */
4335 if (!opts_set->x_ix86_stack_protector_guard)
4336 opts->x_ix86_stack_protector_guard
4337 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4339 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4340 if (opts->x_ix86_tune_memcpy_strategy)
4342 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4343 ix86_parse_stringop_strategy_string (str, false);
4344 free (str);
4347 if (opts->x_ix86_tune_memset_strategy)
4349 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4350 ix86_parse_stringop_strategy_string (str, true);
4351 free (str);
4355 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4357 static void
4358 ix86_option_override (void)
4360 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4361 struct register_pass_info insert_vzeroupper_info
4362 = { pass_insert_vzeroupper, "reload",
4363 1, PASS_POS_INSERT_AFTER
4366 ix86_option_override_internal (true, &global_options, &global_options_set);
4369 /* This needs to be done at start up. It's convenient to do it here. */
4370 register_pass (&insert_vzeroupper_info);
4373 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4374 static char *
4375 ix86_offload_options (void)
4377 if (TARGET_LP64)
4378 return xstrdup ("-foffload-abi=lp64");
4379 return xstrdup ("-foffload-abi=ilp32");
4382 /* Update register usage after having seen the compiler flags. */
4384 static void
4385 ix86_conditional_register_usage (void)
4387 int i, c_mask;
4389 /* For 32-bit targets, squash the REX registers. */
4390 if (! TARGET_64BIT)
4392 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4393 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4394 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4395 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4396 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4397 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4400 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4401 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4402 : TARGET_64BIT ? (1 << 2)
4403 : (1 << 1));
4405 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4407 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4409 /* Set/reset conditionally defined registers from
4410 CALL_USED_REGISTERS initializer. */
4411 if (call_used_regs[i] > 1)
4412 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4414 /* Calculate registers of CLOBBERED_REGS register set
4415 as call used registers from GENERAL_REGS register set. */
4416 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4417 && call_used_regs[i])
4418 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4421 /* If MMX is disabled, squash the registers. */
4422 if (! TARGET_MMX)
4423 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4424 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4425 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4427 /* If SSE is disabled, squash the registers. */
4428 if (! TARGET_SSE)
4429 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4430 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4431 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4433 /* If the FPU is disabled, squash the registers. */
4434 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4435 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4436 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4437 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4439 /* If AVX512F is disabled, squash the registers. */
4440 if (! TARGET_AVX512F)
4442 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4443 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4445 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4446 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4449 /* If MPX is disabled, squash the registers. */
4450 if (! TARGET_MPX)
4451 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4452 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4456 /* Save the current options */
4458 static void
4459 ix86_function_specific_save (struct cl_target_option *ptr,
4460 struct gcc_options *opts)
4462 ptr->arch = ix86_arch;
4463 ptr->schedule = ix86_schedule;
4464 ptr->prefetch_sse = x86_prefetch_sse;
4465 ptr->tune = ix86_tune;
4466 ptr->branch_cost = ix86_branch_cost;
4467 ptr->tune_defaulted = ix86_tune_defaulted;
4468 ptr->arch_specified = ix86_arch_specified;
4469 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4470 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4471 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4472 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4473 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4474 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4475 ptr->x_ix86_abi = opts->x_ix86_abi;
4476 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4477 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4478 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4479 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4480 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4481 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4482 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4483 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4484 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4485 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4486 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4487 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4488 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4489 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4490 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4491 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4492 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4493 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4494 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4495 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4497 /* The fields are char but the variables are not; make sure the
4498 values fit in the fields. */
4499 gcc_assert (ptr->arch == ix86_arch);
4500 gcc_assert (ptr->schedule == ix86_schedule);
4501 gcc_assert (ptr->tune == ix86_tune);
4502 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4505 /* Restore the current options */
4507 static void
4508 ix86_function_specific_restore (struct gcc_options *opts,
4509 struct cl_target_option *ptr)
4511 enum processor_type old_tune = ix86_tune;
4512 enum processor_type old_arch = ix86_arch;
4513 unsigned int ix86_arch_mask;
4514 int i;
4516 /* We don't change -fPIC. */
4517 opts->x_flag_pic = flag_pic;
4519 ix86_arch = (enum processor_type) ptr->arch;
4520 ix86_schedule = (enum attr_cpu) ptr->schedule;
4521 ix86_tune = (enum processor_type) ptr->tune;
4522 x86_prefetch_sse = ptr->prefetch_sse;
4523 opts->x_ix86_branch_cost = ptr->branch_cost;
4524 ix86_tune_defaulted = ptr->tune_defaulted;
4525 ix86_arch_specified = ptr->arch_specified;
4526 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4527 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4528 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4529 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4530 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4531 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4532 opts->x_ix86_abi = ptr->x_ix86_abi;
4533 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4534 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4535 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4536 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4537 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4538 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4539 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4540 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4541 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4542 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4543 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4544 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4545 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4546 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4547 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4548 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4549 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4550 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4551 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4552 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4553 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4554 /* TODO: ix86_cost should be chosen at instruction or function granuality
4555 so for cold code we use size_cost even in !optimize_size compilation. */
4556 if (opts->x_optimize_size)
4557 ix86_cost = &ix86_size_cost;
4558 else
4559 ix86_cost = ix86_tune_cost;
4561 /* Recreate the arch feature tests if the arch changed */
4562 if (old_arch != ix86_arch)
4564 ix86_arch_mask = 1u << ix86_arch;
4565 for (i = 0; i < X86_ARCH_LAST; ++i)
4566 ix86_arch_features[i]
4567 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4570 /* Recreate the tune optimization tests */
4571 if (old_tune != ix86_tune)
4572 set_ix86_tune_features (ix86_tune, false);
4575 /* Adjust target options after streaming them in. This is mainly about
4576 reconciling them with global options. */
4578 static void
4579 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
4581 /* flag_pic is a global option, but ix86_cmodel is target saved option
4582 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
4583 for PIC, or error out. */
4584 if (flag_pic)
4585 switch (ptr->x_ix86_cmodel)
4587 case CM_SMALL:
4588 ptr->x_ix86_cmodel = CM_SMALL_PIC;
4589 break;
4591 case CM_MEDIUM:
4592 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
4593 break;
4595 case CM_LARGE:
4596 ptr->x_ix86_cmodel = CM_LARGE_PIC;
4597 break;
4599 case CM_KERNEL:
4600 error ("code model %s does not support PIC mode", "kernel");
4601 break;
4603 default:
4604 break;
4606 else
4607 switch (ptr->x_ix86_cmodel)
4609 case CM_SMALL_PIC:
4610 ptr->x_ix86_cmodel = CM_SMALL;
4611 break;
4613 case CM_MEDIUM_PIC:
4614 ptr->x_ix86_cmodel = CM_MEDIUM;
4615 break;
4617 case CM_LARGE_PIC:
4618 ptr->x_ix86_cmodel = CM_LARGE;
4619 break;
4621 default:
4622 break;
4626 /* Print the current options */
4628 static void
4629 ix86_function_specific_print (FILE *file, int indent,
4630 struct cl_target_option *ptr)
4632 char *target_string
4633 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4634 NULL, NULL, ptr->x_ix86_fpmath, false);
4636 gcc_assert (ptr->arch < PROCESSOR_max);
4637 fprintf (file, "%*sarch = %d (%s)\n",
4638 indent, "",
4639 ptr->arch, processor_target_table[ptr->arch].name);
4641 gcc_assert (ptr->tune < PROCESSOR_max);
4642 fprintf (file, "%*stune = %d (%s)\n",
4643 indent, "",
4644 ptr->tune, processor_target_table[ptr->tune].name);
4646 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4648 if (target_string)
4650 fprintf (file, "%*s%s\n", indent, "", target_string);
4651 free (target_string);
4656 /* Inner function to process the attribute((target(...))), take an argument and
4657 set the current options from the argument. If we have a list, recursively go
4658 over the list. */
4660 static bool
4661 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4662 struct gcc_options *opts,
4663 struct gcc_options *opts_set,
4664 struct gcc_options *enum_opts_set)
4666 char *next_optstr;
4667 bool ret = true;
4669 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4670 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4671 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4672 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4673 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4675 enum ix86_opt_type
4677 ix86_opt_unknown,
4678 ix86_opt_yes,
4679 ix86_opt_no,
4680 ix86_opt_str,
4681 ix86_opt_enum,
4682 ix86_opt_isa
4685 static const struct
4687 const char *string;
4688 size_t len;
4689 enum ix86_opt_type type;
4690 int opt;
4691 int mask;
4692 } attrs[] = {
4693 /* isa options */
4694 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4695 IX86_ATTR_ISA ("abm", OPT_mabm),
4696 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4697 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4698 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4699 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4700 IX86_ATTR_ISA ("aes", OPT_maes),
4701 IX86_ATTR_ISA ("sha", OPT_msha),
4702 IX86_ATTR_ISA ("avx", OPT_mavx),
4703 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4704 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4705 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4706 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4707 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4708 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4709 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4710 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4711 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4712 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4713 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4714 IX86_ATTR_ISA ("sse", OPT_msse),
4715 IX86_ATTR_ISA ("sse2", OPT_msse2),
4716 IX86_ATTR_ISA ("sse3", OPT_msse3),
4717 IX86_ATTR_ISA ("sse4", OPT_msse4),
4718 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4719 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4720 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4721 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4722 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4723 IX86_ATTR_ISA ("fma", OPT_mfma),
4724 IX86_ATTR_ISA ("xop", OPT_mxop),
4725 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4726 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4727 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4728 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4729 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4730 IX86_ATTR_ISA ("hle", OPT_mhle),
4731 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4732 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4733 IX86_ATTR_ISA ("adx", OPT_madx),
4734 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4735 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4736 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4737 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4738 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4739 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4740 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4741 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
4742 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
4743 IX86_ATTR_ISA ("clwb", OPT_mclwb),
4744 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
4746 /* enum options */
4747 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4749 /* string options */
4750 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4751 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4753 /* flag options */
4754 IX86_ATTR_YES ("cld",
4755 OPT_mcld,
4756 MASK_CLD),
4758 IX86_ATTR_NO ("fancy-math-387",
4759 OPT_mfancy_math_387,
4760 MASK_NO_FANCY_MATH_387),
4762 IX86_ATTR_YES ("ieee-fp",
4763 OPT_mieee_fp,
4764 MASK_IEEE_FP),
4766 IX86_ATTR_YES ("inline-all-stringops",
4767 OPT_minline_all_stringops,
4768 MASK_INLINE_ALL_STRINGOPS),
4770 IX86_ATTR_YES ("inline-stringops-dynamically",
4771 OPT_minline_stringops_dynamically,
4772 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4774 IX86_ATTR_NO ("align-stringops",
4775 OPT_mno_align_stringops,
4776 MASK_NO_ALIGN_STRINGOPS),
4778 IX86_ATTR_YES ("recip",
4779 OPT_mrecip,
4780 MASK_RECIP),
4784 /* If this is a list, recurse to get the options. */
4785 if (TREE_CODE (args) == TREE_LIST)
4787 bool ret = true;
4789 for (; args; args = TREE_CHAIN (args))
4790 if (TREE_VALUE (args)
4791 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4792 p_strings, opts, opts_set,
4793 enum_opts_set))
4794 ret = false;
4796 return ret;
4799 else if (TREE_CODE (args) != STRING_CST)
4801 error ("attribute %<target%> argument not a string");
4802 return false;
4805 /* Handle multiple arguments separated by commas. */
4806 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4808 while (next_optstr && *next_optstr != '\0')
4810 char *p = next_optstr;
4811 char *orig_p = p;
4812 char *comma = strchr (next_optstr, ',');
4813 const char *opt_string;
4814 size_t len, opt_len;
4815 int opt;
4816 bool opt_set_p;
4817 char ch;
4818 unsigned i;
4819 enum ix86_opt_type type = ix86_opt_unknown;
4820 int mask = 0;
4822 if (comma)
4824 *comma = '\0';
4825 len = comma - next_optstr;
4826 next_optstr = comma + 1;
4828 else
4830 len = strlen (p);
4831 next_optstr = NULL;
4834 /* Recognize no-xxx. */
4835 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4837 opt_set_p = false;
4838 p += 3;
4839 len -= 3;
4841 else
4842 opt_set_p = true;
4844 /* Find the option. */
4845 ch = *p;
4846 opt = N_OPTS;
4847 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4849 type = attrs[i].type;
4850 opt_len = attrs[i].len;
4851 if (ch == attrs[i].string[0]
4852 && ((type != ix86_opt_str && type != ix86_opt_enum)
4853 ? len == opt_len
4854 : len > opt_len)
4855 && memcmp (p, attrs[i].string, opt_len) == 0)
4857 opt = attrs[i].opt;
4858 mask = attrs[i].mask;
4859 opt_string = attrs[i].string;
4860 break;
4864 /* Process the option. */
4865 if (opt == N_OPTS)
4867 error ("attribute(target(\"%s\")) is unknown", orig_p);
4868 ret = false;
4871 else if (type == ix86_opt_isa)
4873 struct cl_decoded_option decoded;
4875 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4876 ix86_handle_option (opts, opts_set,
4877 &decoded, input_location);
4880 else if (type == ix86_opt_yes || type == ix86_opt_no)
4882 if (type == ix86_opt_no)
4883 opt_set_p = !opt_set_p;
4885 if (opt_set_p)
4886 opts->x_target_flags |= mask;
4887 else
4888 opts->x_target_flags &= ~mask;
4891 else if (type == ix86_opt_str)
4893 if (p_strings[opt])
4895 error ("option(\"%s\") was already specified", opt_string);
4896 ret = false;
4898 else
4899 p_strings[opt] = xstrdup (p + opt_len);
4902 else if (type == ix86_opt_enum)
4904 bool arg_ok;
4905 int value;
4907 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4908 if (arg_ok)
4909 set_option (opts, enum_opts_set, opt, value,
4910 p + opt_len, DK_UNSPECIFIED, input_location,
4911 global_dc);
4912 else
4914 error ("attribute(target(\"%s\")) is unknown", orig_p);
4915 ret = false;
4919 else
4920 gcc_unreachable ();
4923 return ret;
4926 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4928 tree
4929 ix86_valid_target_attribute_tree (tree args,
4930 struct gcc_options *opts,
4931 struct gcc_options *opts_set)
4933 const char *orig_arch_string = opts->x_ix86_arch_string;
4934 const char *orig_tune_string = opts->x_ix86_tune_string;
4935 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4936 int orig_tune_defaulted = ix86_tune_defaulted;
4937 int orig_arch_specified = ix86_arch_specified;
4938 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4939 tree t = NULL_TREE;
4940 int i;
4941 struct cl_target_option *def
4942 = TREE_TARGET_OPTION (target_option_default_node);
4943 struct gcc_options enum_opts_set;
4945 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4947 /* Process each of the options on the chain. */
4948 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4949 opts_set, &enum_opts_set))
4950 return error_mark_node;
4952 /* If the changed options are different from the default, rerun
4953 ix86_option_override_internal, and then save the options away.
4954 The string options are are attribute options, and will be undone
4955 when we copy the save structure. */
4956 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4957 || opts->x_target_flags != def->x_target_flags
4958 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4959 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4960 || enum_opts_set.x_ix86_fpmath)
4962 /* If we are using the default tune= or arch=, undo the string assigned,
4963 and use the default. */
4964 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4965 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4966 else if (!orig_arch_specified)
4967 opts->x_ix86_arch_string = NULL;
4969 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4970 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4971 else if (orig_tune_defaulted)
4972 opts->x_ix86_tune_string = NULL;
4974 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4975 if (enum_opts_set.x_ix86_fpmath)
4976 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4977 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4978 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4980 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4981 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4984 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4985 ix86_option_override_internal (false, opts, opts_set);
4987 /* Add any builtin functions with the new isa if any. */
4988 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4990 /* Save the current options unless we are validating options for
4991 #pragma. */
4992 t = build_target_option_node (opts);
4994 opts->x_ix86_arch_string = orig_arch_string;
4995 opts->x_ix86_tune_string = orig_tune_string;
4996 opts_set->x_ix86_fpmath = orig_fpmath_set;
4998 /* Free up memory allocated to hold the strings */
4999 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
5000 free (option_strings[i]);
5003 return t;
5006 /* Hook to validate attribute((target("string"))). */
5008 static bool
5009 ix86_valid_target_attribute_p (tree fndecl,
5010 tree ARG_UNUSED (name),
5011 tree args,
5012 int ARG_UNUSED (flags))
5014 struct gcc_options func_options;
5015 tree new_target, new_optimize;
5016 bool ret = true;
5018 /* attribute((target("default"))) does nothing, beyond
5019 affecting multi-versioning. */
5020 if (TREE_VALUE (args)
5021 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
5022 && TREE_CHAIN (args) == NULL_TREE
5023 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
5024 return true;
5026 tree old_optimize = build_optimization_node (&global_options);
5028 /* Get the optimization options of the current function. */
5029 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
5031 if (!func_optimize)
5032 func_optimize = old_optimize;
5034 /* Init func_options. */
5035 memset (&func_options, 0, sizeof (func_options));
5036 init_options_struct (&func_options, NULL);
5037 lang_hooks.init_options_struct (&func_options);
5039 cl_optimization_restore (&func_options,
5040 TREE_OPTIMIZATION (func_optimize));
5042 /* Initialize func_options to the default before its target options can
5043 be set. */
5044 cl_target_option_restore (&func_options,
5045 TREE_TARGET_OPTION (target_option_default_node));
5047 new_target = ix86_valid_target_attribute_tree (args, &func_options,
5048 &global_options_set);
5050 new_optimize = build_optimization_node (&func_options);
5052 if (new_target == error_mark_node)
5053 ret = false;
5055 else if (fndecl && new_target)
5057 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5059 if (old_optimize != new_optimize)
5060 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5063 return ret;
5067 /* Hook to determine if one function can safely inline another. */
5069 static bool
5070 ix86_can_inline_p (tree caller, tree callee)
5072 bool ret = false;
5073 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5074 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5076 /* If callee has no option attributes, then it is ok to inline. */
5077 if (!callee_tree)
5078 ret = true;
5080 /* If caller has no option attributes, but callee does then it is not ok to
5081 inline. */
5082 else if (!caller_tree)
5083 ret = false;
5085 else
5087 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5088 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5090 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
5091 can inline a SSE2 function but a SSE2 function can't inline a SSE4
5092 function. */
5093 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5094 != callee_opts->x_ix86_isa_flags)
5095 ret = false;
5097 /* See if we have the same non-isa options. */
5098 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5099 ret = false;
5101 /* See if arch, tune, etc. are the same. */
5102 else if (caller_opts->arch != callee_opts->arch)
5103 ret = false;
5105 else if (caller_opts->tune != callee_opts->tune)
5106 ret = false;
5108 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5109 ret = false;
5111 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5112 ret = false;
5114 else
5115 ret = true;
5118 return ret;
5122 /* Remember the last target of ix86_set_current_function. */
5123 static GTY(()) tree ix86_previous_fndecl;
5125 /* Set targets globals to the default (or current #pragma GCC target
5126 if active). Invalidate ix86_previous_fndecl cache. */
5128 void
5129 ix86_reset_previous_fndecl (void)
5131 tree new_tree = target_option_current_node;
5132 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5133 if (TREE_TARGET_GLOBALS (new_tree))
5134 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5135 else if (new_tree == target_option_default_node)
5136 restore_target_globals (&default_target_globals);
5137 else
5138 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5139 ix86_previous_fndecl = NULL_TREE;
5142 /* Establish appropriate back-end context for processing the function
5143 FNDECL. The argument might be NULL to indicate processing at top
5144 level, outside of any function scope. */
5145 static void
5146 ix86_set_current_function (tree fndecl)
5148 /* Only change the context if the function changes. This hook is called
5149 several times in the course of compiling a function, and we don't want to
5150 slow things down too much or call target_reinit when it isn't safe. */
5151 if (fndecl == ix86_previous_fndecl)
5152 return;
5154 tree old_tree;
5155 if (ix86_previous_fndecl == NULL_TREE)
5156 old_tree = target_option_current_node;
5157 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
5158 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
5159 else
5160 old_tree = target_option_default_node;
5162 if (fndecl == NULL_TREE)
5164 if (old_tree != target_option_current_node)
5165 ix86_reset_previous_fndecl ();
5166 return;
5169 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
5170 if (new_tree == NULL_TREE)
5171 new_tree = target_option_default_node;
5173 if (old_tree != new_tree)
5175 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5176 if (TREE_TARGET_GLOBALS (new_tree))
5177 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5178 else if (new_tree == target_option_default_node)
5179 restore_target_globals (&default_target_globals);
5180 else
5181 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5183 ix86_previous_fndecl = fndecl;
5187 /* Return true if this goes in large data/bss. */
5189 static bool
5190 ix86_in_large_data_p (tree exp)
5192 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5193 return false;
5195 /* Functions are never large data. */
5196 if (TREE_CODE (exp) == FUNCTION_DECL)
5197 return false;
5199 /* Automatic variables are never large data. */
5200 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5201 return false;
5203 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5205 const char *section = DECL_SECTION_NAME (exp);
5206 if (strcmp (section, ".ldata") == 0
5207 || strcmp (section, ".lbss") == 0)
5208 return true;
5209 return false;
5211 else
5213 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5215 /* If this is an incomplete type with size 0, then we can't put it
5216 in data because it might be too big when completed. Also,
5217 int_size_in_bytes returns -1 if size can vary or is larger than
5218 an integer in which case also it is safer to assume that it goes in
5219 large data. */
5220 if (size <= 0 || size > ix86_section_threshold)
5221 return true;
5224 return false;
5227 /* Switch to the appropriate section for output of DECL.
5228 DECL is either a `VAR_DECL' node or a constant of some sort.
5229 RELOC indicates whether forming the initial value of DECL requires
5230 link-time relocations. */
5232 ATTRIBUTE_UNUSED static section *
5233 x86_64_elf_select_section (tree decl, int reloc,
5234 unsigned HOST_WIDE_INT align)
5236 if (ix86_in_large_data_p (decl))
5238 const char *sname = NULL;
5239 unsigned int flags = SECTION_WRITE;
5240 switch (categorize_decl_for_section (decl, reloc))
5242 case SECCAT_DATA:
5243 sname = ".ldata";
5244 break;
5245 case SECCAT_DATA_REL:
5246 sname = ".ldata.rel";
5247 break;
5248 case SECCAT_DATA_REL_LOCAL:
5249 sname = ".ldata.rel.local";
5250 break;
5251 case SECCAT_DATA_REL_RO:
5252 sname = ".ldata.rel.ro";
5253 break;
5254 case SECCAT_DATA_REL_RO_LOCAL:
5255 sname = ".ldata.rel.ro.local";
5256 break;
5257 case SECCAT_BSS:
5258 sname = ".lbss";
5259 flags |= SECTION_BSS;
5260 break;
5261 case SECCAT_RODATA:
5262 case SECCAT_RODATA_MERGE_STR:
5263 case SECCAT_RODATA_MERGE_STR_INIT:
5264 case SECCAT_RODATA_MERGE_CONST:
5265 sname = ".lrodata";
5266 flags = 0;
5267 break;
5268 case SECCAT_SRODATA:
5269 case SECCAT_SDATA:
5270 case SECCAT_SBSS:
5271 gcc_unreachable ();
5272 case SECCAT_TEXT:
5273 case SECCAT_TDATA:
5274 case SECCAT_TBSS:
5275 /* We don't split these for medium model. Place them into
5276 default sections and hope for best. */
5277 break;
5279 if (sname)
5281 /* We might get called with string constants, but get_named_section
5282 doesn't like them as they are not DECLs. Also, we need to set
5283 flags in that case. */
5284 if (!DECL_P (decl))
5285 return get_section (sname, flags, NULL);
5286 return get_named_section (decl, sname, reloc);
5289 return default_elf_select_section (decl, reloc, align);
5292 /* Select a set of attributes for section NAME based on the properties
5293 of DECL and whether or not RELOC indicates that DECL's initializer
5294 might contain runtime relocations. */
5296 static unsigned int ATTRIBUTE_UNUSED
5297 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5299 unsigned int flags = default_section_type_flags (decl, name, reloc);
5301 if (decl == NULL_TREE
5302 && (strcmp (name, ".ldata.rel.ro") == 0
5303 || strcmp (name, ".ldata.rel.ro.local") == 0))
5304 flags |= SECTION_RELRO;
5306 if (strcmp (name, ".lbss") == 0
5307 || strncmp (name, ".lbss.", 5) == 0
5308 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5309 flags |= SECTION_BSS;
5311 return flags;
5314 /* Build up a unique section name, expressed as a
5315 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5316 RELOC indicates whether the initial value of EXP requires
5317 link-time relocations. */
5319 static void ATTRIBUTE_UNUSED
5320 x86_64_elf_unique_section (tree decl, int reloc)
5322 if (ix86_in_large_data_p (decl))
5324 const char *prefix = NULL;
5325 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5326 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5328 switch (categorize_decl_for_section (decl, reloc))
5330 case SECCAT_DATA:
5331 case SECCAT_DATA_REL:
5332 case SECCAT_DATA_REL_LOCAL:
5333 case SECCAT_DATA_REL_RO:
5334 case SECCAT_DATA_REL_RO_LOCAL:
5335 prefix = one_only ? ".ld" : ".ldata";
5336 break;
5337 case SECCAT_BSS:
5338 prefix = one_only ? ".lb" : ".lbss";
5339 break;
5340 case SECCAT_RODATA:
5341 case SECCAT_RODATA_MERGE_STR:
5342 case SECCAT_RODATA_MERGE_STR_INIT:
5343 case SECCAT_RODATA_MERGE_CONST:
5344 prefix = one_only ? ".lr" : ".lrodata";
5345 break;
5346 case SECCAT_SRODATA:
5347 case SECCAT_SDATA:
5348 case SECCAT_SBSS:
5349 gcc_unreachable ();
5350 case SECCAT_TEXT:
5351 case SECCAT_TDATA:
5352 case SECCAT_TBSS:
5353 /* We don't split these for medium model. Place them into
5354 default sections and hope for best. */
5355 break;
5357 if (prefix)
5359 const char *name, *linkonce;
5360 char *string;
5362 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5363 name = targetm.strip_name_encoding (name);
5365 /* If we're using one_only, then there needs to be a .gnu.linkonce
5366 prefix to the section name. */
5367 linkonce = one_only ? ".gnu.linkonce" : "";
5369 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5371 set_decl_section_name (decl, string);
5372 return;
5375 default_unique_section (decl, reloc);
5378 #ifdef COMMON_ASM_OP
5379 /* This says how to output assembler code to declare an
5380 uninitialized external linkage data object.
5382 For medium model x86-64 we need to use .largecomm opcode for
5383 large objects. */
5384 void
5385 x86_elf_aligned_common (FILE *file,
5386 const char *name, unsigned HOST_WIDE_INT size,
5387 int align)
5389 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5390 && size > (unsigned int)ix86_section_threshold)
5391 fputs ("\t.largecomm\t", file);
5392 else
5393 fputs (COMMON_ASM_OP, file);
5394 assemble_name (file, name);
5395 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5396 size, align / BITS_PER_UNIT);
5398 #endif
5400 /* Utility function for targets to use in implementing
5401 ASM_OUTPUT_ALIGNED_BSS. */
5403 void
5404 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5405 unsigned HOST_WIDE_INT size, int align)
5407 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5408 && size > (unsigned int)ix86_section_threshold)
5409 switch_to_section (get_named_section (decl, ".lbss", 0));
5410 else
5411 switch_to_section (bss_section);
5412 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5413 #ifdef ASM_DECLARE_OBJECT_NAME
5414 last_assemble_variable_decl = decl;
5415 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5416 #else
5417 /* Standard thing is just output label for the object. */
5418 ASM_OUTPUT_LABEL (file, name);
5419 #endif /* ASM_DECLARE_OBJECT_NAME */
5420 ASM_OUTPUT_SKIP (file, size ? size : 1);
5423 /* Decide whether we must probe the stack before any space allocation
5424 on this target. It's essentially TARGET_STACK_PROBE except when
5425 -fstack-check causes the stack to be already probed differently. */
5427 bool
5428 ix86_target_stack_probe (void)
5430 /* Do not probe the stack twice if static stack checking is enabled. */
5431 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5432 return false;
5434 return TARGET_STACK_PROBE;
5437 /* Decide whether we can make a sibling call to a function. DECL is the
5438 declaration of the function being targeted by the call and EXP is the
5439 CALL_EXPR representing the call. */
5441 static bool
5442 ix86_function_ok_for_sibcall (tree decl, tree exp)
5444 tree type, decl_or_type;
5445 rtx a, b;
5447 /* If we are generating position-independent code, we cannot sibcall
5448 optimize any indirect call, or a direct call to a global function,
5449 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5450 if (!TARGET_MACHO
5451 && !TARGET_64BIT
5452 && flag_pic
5453 && (!decl || !targetm.binds_local_p (decl)))
5454 return false;
5456 /* If we need to align the outgoing stack, then sibcalling would
5457 unalign the stack, which may break the called function. */
5458 if (ix86_minimum_incoming_stack_boundary (true)
5459 < PREFERRED_STACK_BOUNDARY)
5460 return false;
5462 if (decl)
5464 decl_or_type = decl;
5465 type = TREE_TYPE (decl);
5467 else
5469 /* We're looking at the CALL_EXPR, we need the type of the function. */
5470 type = CALL_EXPR_FN (exp); /* pointer expression */
5471 type = TREE_TYPE (type); /* pointer type */
5472 type = TREE_TYPE (type); /* function type */
5473 decl_or_type = type;
5476 /* Check that the return value locations are the same. Like
5477 if we are returning floats on the 80387 register stack, we cannot
5478 make a sibcall from a function that doesn't return a float to a
5479 function that does or, conversely, from a function that does return
5480 a float to a function that doesn't; the necessary stack adjustment
5481 would not be executed. This is also the place we notice
5482 differences in the return value ABI. Note that it is ok for one
5483 of the functions to have void return type as long as the return
5484 value of the other is passed in a register. */
5485 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5486 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5487 cfun->decl, false);
5488 if (STACK_REG_P (a) || STACK_REG_P (b))
5490 if (!rtx_equal_p (a, b))
5491 return false;
5493 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5495 else if (!rtx_equal_p (a, b))
5496 return false;
5498 if (TARGET_64BIT)
5500 /* The SYSV ABI has more call-clobbered registers;
5501 disallow sibcalls from MS to SYSV. */
5502 if (cfun->machine->call_abi == MS_ABI
5503 && ix86_function_type_abi (type) == SYSV_ABI)
5504 return false;
5506 else
5508 /* If this call is indirect, we'll need to be able to use a
5509 call-clobbered register for the address of the target function.
5510 Make sure that all such registers are not used for passing
5511 parameters. Note that DLLIMPORT functions are indirect. */
5512 if (!decl
5513 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5515 if (ix86_function_regparm (type, NULL) >= 3)
5517 /* ??? Need to count the actual number of registers to be used,
5518 not the possible number of registers. Fix later. */
5519 return false;
5524 /* Otherwise okay. That also includes certain types of indirect calls. */
5525 return true;
5528 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5529 and "sseregparm" calling convention attributes;
5530 arguments as in struct attribute_spec.handler. */
5532 static tree
5533 ix86_handle_cconv_attribute (tree *node, tree name,
5534 tree args,
5535 int,
5536 bool *no_add_attrs)
5538 if (TREE_CODE (*node) != FUNCTION_TYPE
5539 && TREE_CODE (*node) != METHOD_TYPE
5540 && TREE_CODE (*node) != FIELD_DECL
5541 && TREE_CODE (*node) != TYPE_DECL)
5543 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5544 name);
5545 *no_add_attrs = true;
5546 return NULL_TREE;
5549 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5550 if (is_attribute_p ("regparm", name))
5552 tree cst;
5554 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5556 error ("fastcall and regparm attributes are not compatible");
5559 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5561 error ("regparam and thiscall attributes are not compatible");
5564 cst = TREE_VALUE (args);
5565 if (TREE_CODE (cst) != INTEGER_CST)
5567 warning (OPT_Wattributes,
5568 "%qE attribute requires an integer constant argument",
5569 name);
5570 *no_add_attrs = true;
5572 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5574 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5575 name, REGPARM_MAX);
5576 *no_add_attrs = true;
5579 return NULL_TREE;
5582 if (TARGET_64BIT)
5584 /* Do not warn when emulating the MS ABI. */
5585 if ((TREE_CODE (*node) != FUNCTION_TYPE
5586 && TREE_CODE (*node) != METHOD_TYPE)
5587 || ix86_function_type_abi (*node) != MS_ABI)
5588 warning (OPT_Wattributes, "%qE attribute ignored",
5589 name);
5590 *no_add_attrs = true;
5591 return NULL_TREE;
5594 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5595 if (is_attribute_p ("fastcall", name))
5597 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5599 error ("fastcall and cdecl attributes are not compatible");
5601 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5603 error ("fastcall and stdcall attributes are not compatible");
5605 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5607 error ("fastcall and regparm attributes are not compatible");
5609 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5611 error ("fastcall and thiscall attributes are not compatible");
5615 /* Can combine stdcall with fastcall (redundant), regparm and
5616 sseregparm. */
5617 else if (is_attribute_p ("stdcall", name))
5619 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5621 error ("stdcall and cdecl attributes are not compatible");
5623 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5625 error ("stdcall and fastcall attributes are not compatible");
5627 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5629 error ("stdcall and thiscall attributes are not compatible");
5633 /* Can combine cdecl with regparm and sseregparm. */
5634 else if (is_attribute_p ("cdecl", name))
5636 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5638 error ("stdcall and cdecl attributes are not compatible");
5640 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5642 error ("fastcall and cdecl attributes are not compatible");
5644 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5646 error ("cdecl and thiscall attributes are not compatible");
5649 else if (is_attribute_p ("thiscall", name))
5651 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5652 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5653 name);
5654 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5656 error ("stdcall and thiscall attributes are not compatible");
5658 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5660 error ("fastcall and thiscall attributes are not compatible");
5662 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5664 error ("cdecl and thiscall attributes are not compatible");
5668 /* Can combine sseregparm with all attributes. */
5670 return NULL_TREE;
5673 /* The transactional memory builtins are implicitly regparm or fastcall
5674 depending on the ABI. Override the generic do-nothing attribute that
5675 these builtins were declared with, and replace it with one of the two
5676 attributes that we expect elsewhere. */
5678 static tree
5679 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5680 int flags, bool *no_add_attrs)
5682 tree alt;
5684 /* In no case do we want to add the placeholder attribute. */
5685 *no_add_attrs = true;
5687 /* The 64-bit ABI is unchanged for transactional memory. */
5688 if (TARGET_64BIT)
5689 return NULL_TREE;
5691 /* ??? Is there a better way to validate 32-bit windows? We have
5692 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5693 if (CHECK_STACK_LIMIT > 0)
5694 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5695 else
5697 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5698 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5700 decl_attributes (node, alt, flags);
5702 return NULL_TREE;
5705 /* This function determines from TYPE the calling-convention. */
5707 unsigned int
5708 ix86_get_callcvt (const_tree type)
5710 unsigned int ret = 0;
5711 bool is_stdarg;
5712 tree attrs;
5714 if (TARGET_64BIT)
5715 return IX86_CALLCVT_CDECL;
5717 attrs = TYPE_ATTRIBUTES (type);
5718 if (attrs != NULL_TREE)
5720 if (lookup_attribute ("cdecl", attrs))
5721 ret |= IX86_CALLCVT_CDECL;
5722 else if (lookup_attribute ("stdcall", attrs))
5723 ret |= IX86_CALLCVT_STDCALL;
5724 else if (lookup_attribute ("fastcall", attrs))
5725 ret |= IX86_CALLCVT_FASTCALL;
5726 else if (lookup_attribute ("thiscall", attrs))
5727 ret |= IX86_CALLCVT_THISCALL;
5729 /* Regparam isn't allowed for thiscall and fastcall. */
5730 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5732 if (lookup_attribute ("regparm", attrs))
5733 ret |= IX86_CALLCVT_REGPARM;
5734 if (lookup_attribute ("sseregparm", attrs))
5735 ret |= IX86_CALLCVT_SSEREGPARM;
5738 if (IX86_BASE_CALLCVT(ret) != 0)
5739 return ret;
5742 is_stdarg = stdarg_p (type);
5743 if (TARGET_RTD && !is_stdarg)
5744 return IX86_CALLCVT_STDCALL | ret;
5746 if (ret != 0
5747 || is_stdarg
5748 || TREE_CODE (type) != METHOD_TYPE
5749 || ix86_function_type_abi (type) != MS_ABI)
5750 return IX86_CALLCVT_CDECL | ret;
5752 return IX86_CALLCVT_THISCALL;
5755 /* Return 0 if the attributes for two types are incompatible, 1 if they
5756 are compatible, and 2 if they are nearly compatible (which causes a
5757 warning to be generated). */
5759 static int
5760 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5762 unsigned int ccvt1, ccvt2;
5764 if (TREE_CODE (type1) != FUNCTION_TYPE
5765 && TREE_CODE (type1) != METHOD_TYPE)
5766 return 1;
5768 ccvt1 = ix86_get_callcvt (type1);
5769 ccvt2 = ix86_get_callcvt (type2);
5770 if (ccvt1 != ccvt2)
5771 return 0;
5772 if (ix86_function_regparm (type1, NULL)
5773 != ix86_function_regparm (type2, NULL))
5774 return 0;
5776 return 1;
5779 /* Return the regparm value for a function with the indicated TYPE and DECL.
5780 DECL may be NULL when calling function indirectly
5781 or considering a libcall. */
5783 static int
5784 ix86_function_regparm (const_tree type, const_tree decl)
5786 tree attr;
5787 int regparm;
5788 unsigned int ccvt;
5790 if (TARGET_64BIT)
5791 return (ix86_function_type_abi (type) == SYSV_ABI
5792 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5793 ccvt = ix86_get_callcvt (type);
5794 regparm = ix86_regparm;
5796 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5798 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5799 if (attr)
5801 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5802 return regparm;
5805 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5806 return 2;
5807 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5808 return 1;
5810 /* Use register calling convention for local functions when possible. */
5811 if (decl
5812 && TREE_CODE (decl) == FUNCTION_DECL)
5814 cgraph_node *target = cgraph_node::get (decl);
5815 if (target)
5816 target = target->function_symbol ();
5818 /* Caller and callee must agree on the calling convention, so
5819 checking here just optimize means that with
5820 __attribute__((optimize (...))) caller could use regparm convention
5821 and callee not, or vice versa. Instead look at whether the callee
5822 is optimized or not. */
5823 if (target && opt_for_fn (target->decl, optimize)
5824 && !(profile_flag && !flag_fentry))
5826 cgraph_local_info *i = &target->local;
5827 if (i && i->local && i->can_change_signature)
5829 int local_regparm, globals = 0, regno;
5831 /* Make sure no regparm register is taken by a
5832 fixed register variable. */
5833 for (local_regparm = 0; local_regparm < REGPARM_MAX;
5834 local_regparm++)
5835 if (fixed_regs[local_regparm])
5836 break;
5838 /* We don't want to use regparm(3) for nested functions as
5839 these use a static chain pointer in the third argument. */
5840 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
5841 local_regparm = 2;
5843 /* Save a register for the split stack. */
5844 if (local_regparm == 3 && flag_split_stack)
5845 local_regparm = 2;
5847 /* Each fixed register usage increases register pressure,
5848 so less registers should be used for argument passing.
5849 This functionality can be overriden by an explicit
5850 regparm value. */
5851 for (regno = AX_REG; regno <= DI_REG; regno++)
5852 if (fixed_regs[regno])
5853 globals++;
5855 local_regparm
5856 = globals < local_regparm ? local_regparm - globals : 0;
5858 if (local_regparm > regparm)
5859 regparm = local_regparm;
5864 return regparm;
5867 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5868 DFmode (2) arguments in SSE registers for a function with the
5869 indicated TYPE and DECL. DECL may be NULL when calling function
5870 indirectly or considering a libcall. Otherwise return 0. */
5872 static int
5873 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5875 gcc_assert (!TARGET_64BIT);
5877 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5878 by the sseregparm attribute. */
5879 if (TARGET_SSEREGPARM
5880 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5882 if (!TARGET_SSE)
5884 if (warn)
5886 if (decl)
5887 error ("calling %qD with attribute sseregparm without "
5888 "SSE/SSE2 enabled", decl);
5889 else
5890 error ("calling %qT with attribute sseregparm without "
5891 "SSE/SSE2 enabled", type);
5893 return 0;
5896 return 2;
5899 if (!decl)
5900 return 0;
5902 cgraph_node *target = cgraph_node::get (decl);
5903 if (target)
5904 target = target->function_symbol ();
5906 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5907 (and DFmode for SSE2) arguments in SSE registers. */
5908 if (target
5909 /* TARGET_SSE_MATH */
5910 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
5911 && opt_for_fn (target->decl, optimize)
5912 && !(profile_flag && !flag_fentry))
5914 cgraph_local_info *i = &target->local;
5915 if (i && i->local && i->can_change_signature)
5917 /* Refuse to produce wrong code when local function with SSE enabled
5918 is called from SSE disabled function.
5919 We may work hard to work out these scenarios but hopefully
5920 it doesnot matter in practice. */
5921 if (!TARGET_SSE && warn)
5923 error ("calling %qD with SSE caling convention without "
5924 "SSE/SSE2 enabled", decl);
5925 return 0;
5927 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
5928 ->x_ix86_isa_flags) ? 2 : 1;
5932 return 0;
5935 /* Return true if EAX is live at the start of the function. Used by
5936 ix86_expand_prologue to determine if we need special help before
5937 calling allocate_stack_worker. */
5939 static bool
5940 ix86_eax_live_at_start_p (void)
5942 /* Cheat. Don't bother working forward from ix86_function_regparm
5943 to the function type to whether an actual argument is located in
5944 eax. Instead just look at cfg info, which is still close enough
5945 to correct at this point. This gives false positives for broken
5946 functions that might use uninitialized data that happens to be
5947 allocated in eax, but who cares? */
5948 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5951 static bool
5952 ix86_keep_aggregate_return_pointer (tree fntype)
5954 tree attr;
5956 if (!TARGET_64BIT)
5958 attr = lookup_attribute ("callee_pop_aggregate_return",
5959 TYPE_ATTRIBUTES (fntype));
5960 if (attr)
5961 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5963 /* For 32-bit MS-ABI the default is to keep aggregate
5964 return pointer. */
5965 if (ix86_function_type_abi (fntype) == MS_ABI)
5966 return true;
5968 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5971 /* Value is the number of bytes of arguments automatically
5972 popped when returning from a subroutine call.
5973 FUNDECL is the declaration node of the function (as a tree),
5974 FUNTYPE is the data type of the function (as a tree),
5975 or for a library call it is an identifier node for the subroutine name.
5976 SIZE is the number of bytes of arguments passed on the stack.
5978 On the 80386, the RTD insn may be used to pop them if the number
5979 of args is fixed, but if the number is variable then the caller
5980 must pop them all. RTD can't be used for library calls now
5981 because the library is compiled with the Unix compiler.
5982 Use of RTD is a selectable option, since it is incompatible with
5983 standard Unix calling sequences. If the option is not selected,
5984 the caller must always pop the args.
5986 The attribute stdcall is equivalent to RTD on a per module basis. */
5988 static int
5989 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5991 unsigned int ccvt;
5993 /* None of the 64-bit ABIs pop arguments. */
5994 if (TARGET_64BIT)
5995 return 0;
5997 ccvt = ix86_get_callcvt (funtype);
5999 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
6000 | IX86_CALLCVT_THISCALL)) != 0
6001 && ! stdarg_p (funtype))
6002 return size;
6004 /* Lose any fake structure return argument if it is passed on the stack. */
6005 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
6006 && !ix86_keep_aggregate_return_pointer (funtype))
6008 int nregs = ix86_function_regparm (funtype, fundecl);
6009 if (nregs == 0)
6010 return GET_MODE_SIZE (Pmode);
6013 return 0;
6016 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
6018 static bool
6019 ix86_legitimate_combined_insn (rtx_insn *insn)
6021 /* Check operand constraints in case hard registers were propagated
6022 into insn pattern. This check prevents combine pass from
6023 generating insn patterns with invalid hard register operands.
6024 These invalid insns can eventually confuse reload to error out
6025 with a spill failure. See also PRs 46829 and 46843. */
6026 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
6028 int i;
6030 extract_insn (insn);
6031 preprocess_constraints (insn);
6033 int n_operands = recog_data.n_operands;
6034 int n_alternatives = recog_data.n_alternatives;
6035 for (i = 0; i < n_operands; i++)
6037 rtx op = recog_data.operand[i];
6038 machine_mode mode = GET_MODE (op);
6039 const operand_alternative *op_alt;
6040 int offset = 0;
6041 bool win;
6042 int j;
6044 /* For pre-AVX disallow unaligned loads/stores where the
6045 instructions don't support it. */
6046 if (!TARGET_AVX
6047 && VECTOR_MODE_P (GET_MODE (op))
6048 && misaligned_operand (op, GET_MODE (op)))
6050 int min_align = get_attr_ssememalign (insn);
6051 if (min_align == 0)
6052 return false;
6055 /* A unary operator may be accepted by the predicate, but it
6056 is irrelevant for matching constraints. */
6057 if (UNARY_P (op))
6058 op = XEXP (op, 0);
6060 if (GET_CODE (op) == SUBREG)
6062 if (REG_P (SUBREG_REG (op))
6063 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6064 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6065 GET_MODE (SUBREG_REG (op)),
6066 SUBREG_BYTE (op),
6067 GET_MODE (op));
6068 op = SUBREG_REG (op);
6071 if (!(REG_P (op) && HARD_REGISTER_P (op)))
6072 continue;
6074 op_alt = recog_op_alt;
6076 /* Operand has no constraints, anything is OK. */
6077 win = !n_alternatives;
6079 alternative_mask preferred = get_preferred_alternatives (insn);
6080 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6082 if (!TEST_BIT (preferred, j))
6083 continue;
6084 if (op_alt[i].anything_ok
6085 || (op_alt[i].matches != -1
6086 && operands_match_p
6087 (recog_data.operand[i],
6088 recog_data.operand[op_alt[i].matches]))
6089 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6091 win = true;
6092 break;
6096 if (!win)
6097 return false;
6101 return true;
6104 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6106 static unsigned HOST_WIDE_INT
6107 ix86_asan_shadow_offset (void)
6109 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6110 : HOST_WIDE_INT_C (0x7fff8000))
6111 : (HOST_WIDE_INT_1 << 29);
6114 /* Argument support functions. */
6116 /* Return true when register may be used to pass function parameters. */
6117 bool
6118 ix86_function_arg_regno_p (int regno)
6120 int i;
6121 const int *parm_regs;
6123 if (TARGET_MPX && BND_REGNO_P (regno))
6124 return true;
6126 if (!TARGET_64BIT)
6128 if (TARGET_MACHO)
6129 return (regno < REGPARM_MAX
6130 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6131 else
6132 return (regno < REGPARM_MAX
6133 || (TARGET_MMX && MMX_REGNO_P (regno)
6134 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6135 || (TARGET_SSE && SSE_REGNO_P (regno)
6136 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6139 if (TARGET_SSE && SSE_REGNO_P (regno)
6140 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6141 return true;
6143 /* TODO: The function should depend on current function ABI but
6144 builtins.c would need updating then. Therefore we use the
6145 default ABI. */
6147 /* RAX is used as hidden argument to va_arg functions. */
6148 if (ix86_abi == SYSV_ABI && regno == AX_REG)
6149 return true;
6151 if (ix86_abi == MS_ABI)
6152 parm_regs = x86_64_ms_abi_int_parameter_registers;
6153 else
6154 parm_regs = x86_64_int_parameter_registers;
6155 for (i = 0; i < (ix86_abi == MS_ABI
6156 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6157 if (regno == parm_regs[i])
6158 return true;
6159 return false;
6162 /* Return if we do not know how to pass TYPE solely in registers. */
6164 static bool
6165 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6167 if (must_pass_in_stack_var_size_or_pad (mode, type))
6168 return true;
6170 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6171 The layout_type routine is crafty and tries to trick us into passing
6172 currently unsupported vector types on the stack by using TImode. */
6173 return (!TARGET_64BIT && mode == TImode
6174 && type && TREE_CODE (type) != VECTOR_TYPE);
6177 /* It returns the size, in bytes, of the area reserved for arguments passed
6178 in registers for the function represented by fndecl dependent to the used
6179 abi format. */
6181 ix86_reg_parm_stack_space (const_tree fndecl)
6183 enum calling_abi call_abi = SYSV_ABI;
6184 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6185 call_abi = ix86_function_abi (fndecl);
6186 else
6187 call_abi = ix86_function_type_abi (fndecl);
6188 if (TARGET_64BIT && call_abi == MS_ABI)
6189 return 32;
6190 return 0;
6193 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6194 call abi used. */
6195 enum calling_abi
6196 ix86_function_type_abi (const_tree fntype)
6198 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6200 enum calling_abi abi = ix86_abi;
6201 if (abi == SYSV_ABI)
6203 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6205 if (TARGET_X32)
6207 static bool warned = false;
6208 if (!warned)
6210 error ("X32 does not support ms_abi attribute");
6211 warned = true;
6214 abi = MS_ABI;
6217 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6218 abi = SYSV_ABI;
6219 return abi;
6221 return ix86_abi;
6224 /* We add this as a workaround in order to use libc_has_function
6225 hook in i386.md. */
6226 bool
6227 ix86_libc_has_function (enum function_class fn_class)
6229 return targetm.libc_has_function (fn_class);
6232 static bool
6233 ix86_function_ms_hook_prologue (const_tree fn)
6235 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6237 if (decl_function_context (fn) != NULL_TREE)
6238 error_at (DECL_SOURCE_LOCATION (fn),
6239 "ms_hook_prologue is not compatible with nested function");
6240 else
6241 return true;
6243 return false;
6246 static enum calling_abi
6247 ix86_function_abi (const_tree fndecl)
6249 if (! fndecl)
6250 return ix86_abi;
6251 return ix86_function_type_abi (TREE_TYPE (fndecl));
6254 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6255 call abi used. */
6256 enum calling_abi
6257 ix86_cfun_abi (void)
6259 if (! cfun)
6260 return ix86_abi;
6261 return cfun->machine->call_abi;
6264 /* Write the extra assembler code needed to declare a function properly. */
6266 void
6267 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6268 tree decl)
6270 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6272 if (is_ms_hook)
6274 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6275 unsigned int filler_cc = 0xcccccccc;
6277 for (i = 0; i < filler_count; i += 4)
6278 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6281 #ifdef SUBTARGET_ASM_UNWIND_INIT
6282 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6283 #endif
6285 ASM_OUTPUT_LABEL (asm_out_file, fname);
6287 /* Output magic byte marker, if hot-patch attribute is set. */
6288 if (is_ms_hook)
6290 if (TARGET_64BIT)
6292 /* leaq [%rsp + 0], %rsp */
6293 asm_fprintf (asm_out_file, ASM_BYTE
6294 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6296 else
6298 /* movl.s %edi, %edi
6299 push %ebp
6300 movl.s %esp, %ebp */
6301 asm_fprintf (asm_out_file, ASM_BYTE
6302 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6307 /* regclass.c */
6308 extern void init_regs (void);
6310 /* Implementation of call abi switching target hook. Specific to FNDECL
6311 the specific call register sets are set. See also
6312 ix86_conditional_register_usage for more details. */
6313 void
6314 ix86_call_abi_override (const_tree fndecl)
6316 if (fndecl == NULL_TREE)
6317 cfun->machine->call_abi = ix86_abi;
6318 else
6319 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6322 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6323 expensive re-initialization of init_regs each time we switch function context
6324 since this is needed only during RTL expansion. */
6325 static void
6326 ix86_maybe_switch_abi (void)
6328 if (TARGET_64BIT &&
6329 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6330 reinit_regs ();
6333 /* Return 1 if pseudo register should be created and used to hold
6334 GOT address for PIC code. */
6335 bool
6336 ix86_use_pseudo_pic_reg (void)
6338 if ((TARGET_64BIT
6339 && (ix86_cmodel == CM_SMALL_PIC
6340 || TARGET_PECOFF))
6341 || !flag_pic)
6342 return false;
6343 return true;
6346 /* Initialize large model PIC register. */
6348 static void
6349 ix86_init_large_pic_reg (unsigned int tmp_regno)
6351 rtx_code_label *label;
6352 rtx tmp_reg;
6354 gcc_assert (Pmode == DImode);
6355 label = gen_label_rtx ();
6356 emit_label (label);
6357 LABEL_PRESERVE_P (label) = 1;
6358 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6359 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6360 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6361 label));
6362 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6363 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6364 pic_offset_table_rtx, tmp_reg));
6367 /* Create and initialize PIC register if required. */
6368 static void
6369 ix86_init_pic_reg (void)
6371 edge entry_edge;
6372 rtx_insn *seq;
6374 if (!ix86_use_pseudo_pic_reg ())
6375 return;
6377 start_sequence ();
6379 if (TARGET_64BIT)
6381 if (ix86_cmodel == CM_LARGE_PIC)
6382 ix86_init_large_pic_reg (R11_REG);
6383 else
6384 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6386 else
6388 /* If there is future mcount call in the function it is more profitable
6389 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
6390 rtx reg = crtl->profile
6391 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6392 : pic_offset_table_rtx;
6393 rtx insn = emit_insn (gen_set_got (reg));
6394 RTX_FRAME_RELATED_P (insn) = 1;
6395 if (crtl->profile)
6396 emit_move_insn (pic_offset_table_rtx, reg);
6397 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6400 seq = get_insns ();
6401 end_sequence ();
6403 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6404 insert_insn_on_edge (seq, entry_edge);
6405 commit_one_edge_insertion (entry_edge);
6408 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6409 for a call to a function whose data type is FNTYPE.
6410 For a library call, FNTYPE is 0. */
6412 void
6413 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6414 tree fntype, /* tree ptr for function decl */
6415 rtx libname, /* SYMBOL_REF of library name or 0 */
6416 tree fndecl,
6417 int caller)
6419 struct cgraph_local_info *i = NULL;
6420 struct cgraph_node *target = NULL;
6422 memset (cum, 0, sizeof (*cum));
6424 if (fndecl)
6426 target = cgraph_node::get (fndecl);
6427 if (target)
6429 target = target->function_symbol ();
6430 i = cgraph_node::local_info (target->decl);
6431 cum->call_abi = ix86_function_abi (target->decl);
6433 else
6434 cum->call_abi = ix86_function_abi (fndecl);
6436 else
6437 cum->call_abi = ix86_function_type_abi (fntype);
6439 cum->caller = caller;
6441 /* Set up the number of registers to use for passing arguments. */
6442 cum->nregs = ix86_regparm;
6443 if (TARGET_64BIT)
6445 cum->nregs = (cum->call_abi == SYSV_ABI
6446 ? X86_64_REGPARM_MAX
6447 : X86_64_MS_REGPARM_MAX);
6449 if (TARGET_SSE)
6451 cum->sse_nregs = SSE_REGPARM_MAX;
6452 if (TARGET_64BIT)
6454 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6455 ? X86_64_SSE_REGPARM_MAX
6456 : X86_64_MS_SSE_REGPARM_MAX);
6459 if (TARGET_MMX)
6460 cum->mmx_nregs = MMX_REGPARM_MAX;
6461 cum->warn_avx512f = true;
6462 cum->warn_avx = true;
6463 cum->warn_sse = true;
6464 cum->warn_mmx = true;
6466 /* Because type might mismatch in between caller and callee, we need to
6467 use actual type of function for local calls.
6468 FIXME: cgraph_analyze can be told to actually record if function uses
6469 va_start so for local functions maybe_vaarg can be made aggressive
6470 helping K&R code.
6471 FIXME: once typesytem is fixed, we won't need this code anymore. */
6472 if (i && i->local && i->can_change_signature)
6473 fntype = TREE_TYPE (target->decl);
6474 cum->stdarg = stdarg_p (fntype);
6475 cum->maybe_vaarg = (fntype
6476 ? (!prototype_p (fntype) || stdarg_p (fntype))
6477 : !libname);
6479 cum->bnd_regno = FIRST_BND_REG;
6480 cum->bnds_in_bt = 0;
6481 cum->force_bnd_pass = 0;
6483 if (!TARGET_64BIT)
6485 /* If there are variable arguments, then we won't pass anything
6486 in registers in 32-bit mode. */
6487 if (stdarg_p (fntype))
6489 cum->nregs = 0;
6490 cum->sse_nregs = 0;
6491 cum->mmx_nregs = 0;
6492 cum->warn_avx512f = false;
6493 cum->warn_avx = false;
6494 cum->warn_sse = false;
6495 cum->warn_mmx = false;
6496 return;
6499 /* Use ecx and edx registers if function has fastcall attribute,
6500 else look for regparm information. */
6501 if (fntype)
6503 unsigned int ccvt = ix86_get_callcvt (fntype);
6504 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6506 cum->nregs = 1;
6507 cum->fastcall = 1; /* Same first register as in fastcall. */
6509 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6511 cum->nregs = 2;
6512 cum->fastcall = 1;
6514 else
6515 cum->nregs = ix86_function_regparm (fntype, fndecl);
6518 /* Set up the number of SSE registers used for passing SFmode
6519 and DFmode arguments. Warn for mismatching ABI. */
6520 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6524 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6525 But in the case of vector types, it is some vector mode.
6527 When we have only some of our vector isa extensions enabled, then there
6528 are some modes for which vector_mode_supported_p is false. For these
6529 modes, the generic vector support in gcc will choose some non-vector mode
6530 in order to implement the type. By computing the natural mode, we'll
6531 select the proper ABI location for the operand and not depend on whatever
6532 the middle-end decides to do with these vector types.
6534 The midde-end can't deal with the vector types > 16 bytes. In this
6535 case, we return the original mode and warn ABI change if CUM isn't
6536 NULL.
6538 If INT_RETURN is true, warn ABI change if the vector mode isn't
6539 available for function return value. */
6541 static machine_mode
6542 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6543 bool in_return)
6545 machine_mode mode = TYPE_MODE (type);
6547 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6549 HOST_WIDE_INT size = int_size_in_bytes (type);
6550 if ((size == 8 || size == 16 || size == 32 || size == 64)
6551 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6552 && TYPE_VECTOR_SUBPARTS (type) > 1)
6554 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6556 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6557 mode = MIN_MODE_VECTOR_FLOAT;
6558 else
6559 mode = MIN_MODE_VECTOR_INT;
6561 /* Get the mode which has this inner mode and number of units. */
6562 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6563 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6564 && GET_MODE_INNER (mode) == innermode)
6566 if (size == 64 && !TARGET_AVX512F)
6568 static bool warnedavx512f;
6569 static bool warnedavx512f_ret;
6571 if (cum && cum->warn_avx512f && !warnedavx512f)
6573 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6574 "without AVX512F enabled changes the ABI"))
6575 warnedavx512f = true;
6577 else if (in_return && !warnedavx512f_ret)
6579 if (warning (OPT_Wpsabi, "AVX512F vector return "
6580 "without AVX512F enabled changes the ABI"))
6581 warnedavx512f_ret = true;
6584 return TYPE_MODE (type);
6586 else if (size == 32 && !TARGET_AVX)
6588 static bool warnedavx;
6589 static bool warnedavx_ret;
6591 if (cum && cum->warn_avx && !warnedavx)
6593 if (warning (OPT_Wpsabi, "AVX vector argument "
6594 "without AVX enabled changes the ABI"))
6595 warnedavx = true;
6597 else if (in_return && !warnedavx_ret)
6599 if (warning (OPT_Wpsabi, "AVX vector return "
6600 "without AVX enabled changes the ABI"))
6601 warnedavx_ret = true;
6604 return TYPE_MODE (type);
6606 else if (((size == 8 && TARGET_64BIT) || size == 16)
6607 && !TARGET_SSE)
6609 static bool warnedsse;
6610 static bool warnedsse_ret;
6612 if (cum && cum->warn_sse && !warnedsse)
6614 if (warning (OPT_Wpsabi, "SSE vector argument "
6615 "without SSE enabled changes the ABI"))
6616 warnedsse = true;
6618 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6620 if (warning (OPT_Wpsabi, "SSE vector return "
6621 "without SSE enabled changes the ABI"))
6622 warnedsse_ret = true;
6625 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6627 static bool warnedmmx;
6628 static bool warnedmmx_ret;
6630 if (cum && cum->warn_mmx && !warnedmmx)
6632 if (warning (OPT_Wpsabi, "MMX vector argument "
6633 "without MMX enabled changes the ABI"))
6634 warnedmmx = true;
6636 else if (in_return && !warnedmmx_ret)
6638 if (warning (OPT_Wpsabi, "MMX vector return "
6639 "without MMX enabled changes the ABI"))
6640 warnedmmx_ret = true;
6643 return mode;
6646 gcc_unreachable ();
6650 return mode;
6653 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6654 this may not agree with the mode that the type system has chosen for the
6655 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6656 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6658 static rtx
6659 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6660 unsigned int regno)
6662 rtx tmp;
6664 if (orig_mode != BLKmode)
6665 tmp = gen_rtx_REG (orig_mode, regno);
6666 else
6668 tmp = gen_rtx_REG (mode, regno);
6669 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6670 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6673 return tmp;
6676 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6677 of this code is to classify each 8bytes of incoming argument by the register
6678 class and assign registers accordingly. */
6680 /* Return the union class of CLASS1 and CLASS2.
6681 See the x86-64 PS ABI for details. */
6683 static enum x86_64_reg_class
6684 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6686 /* Rule #1: If both classes are equal, this is the resulting class. */
6687 if (class1 == class2)
6688 return class1;
6690 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6691 the other class. */
6692 if (class1 == X86_64_NO_CLASS)
6693 return class2;
6694 if (class2 == X86_64_NO_CLASS)
6695 return class1;
6697 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6698 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6699 return X86_64_MEMORY_CLASS;
6701 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6702 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6703 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6704 return X86_64_INTEGERSI_CLASS;
6705 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6706 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6707 return X86_64_INTEGER_CLASS;
6709 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6710 MEMORY is used. */
6711 if (class1 == X86_64_X87_CLASS
6712 || class1 == X86_64_X87UP_CLASS
6713 || class1 == X86_64_COMPLEX_X87_CLASS
6714 || class2 == X86_64_X87_CLASS
6715 || class2 == X86_64_X87UP_CLASS
6716 || class2 == X86_64_COMPLEX_X87_CLASS)
6717 return X86_64_MEMORY_CLASS;
6719 /* Rule #6: Otherwise class SSE is used. */
6720 return X86_64_SSE_CLASS;
6723 /* Classify the argument of type TYPE and mode MODE.
6724 CLASSES will be filled by the register class used to pass each word
6725 of the operand. The number of words is returned. In case the parameter
6726 should be passed in memory, 0 is returned. As a special case for zero
6727 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6729 BIT_OFFSET is used internally for handling records and specifies offset
6730 of the offset in bits modulo 512 to avoid overflow cases.
6732 See the x86-64 PS ABI for details.
6735 static int
6736 classify_argument (machine_mode mode, const_tree type,
6737 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6739 HOST_WIDE_INT bytes =
6740 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6741 int words
6742 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6744 /* Variable sized entities are always passed/returned in memory. */
6745 if (bytes < 0)
6746 return 0;
6748 if (mode != VOIDmode
6749 && targetm.calls.must_pass_in_stack (mode, type))
6750 return 0;
6752 if (type && AGGREGATE_TYPE_P (type))
6754 int i;
6755 tree field;
6756 enum x86_64_reg_class subclasses[MAX_CLASSES];
6758 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6759 if (bytes > 64)
6760 return 0;
6762 for (i = 0; i < words; i++)
6763 classes[i] = X86_64_NO_CLASS;
6765 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6766 signalize memory class, so handle it as special case. */
6767 if (!words)
6769 classes[0] = X86_64_NO_CLASS;
6770 return 1;
6773 /* Classify each field of record and merge classes. */
6774 switch (TREE_CODE (type))
6776 case RECORD_TYPE:
6777 /* And now merge the fields of structure. */
6778 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6780 if (TREE_CODE (field) == FIELD_DECL)
6782 int num;
6784 if (TREE_TYPE (field) == error_mark_node)
6785 continue;
6787 /* Bitfields are always classified as integer. Handle them
6788 early, since later code would consider them to be
6789 misaligned integers. */
6790 if (DECL_BIT_FIELD (field))
6792 for (i = (int_bit_position (field)
6793 + (bit_offset % 64)) / 8 / 8;
6794 i < ((int_bit_position (field) + (bit_offset % 64))
6795 + tree_to_shwi (DECL_SIZE (field))
6796 + 63) / 8 / 8; i++)
6797 classes[i] =
6798 merge_classes (X86_64_INTEGER_CLASS,
6799 classes[i]);
6801 else
6803 int pos;
6805 type = TREE_TYPE (field);
6807 /* Flexible array member is ignored. */
6808 if (TYPE_MODE (type) == BLKmode
6809 && TREE_CODE (type) == ARRAY_TYPE
6810 && TYPE_SIZE (type) == NULL_TREE
6811 && TYPE_DOMAIN (type) != NULL_TREE
6812 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6813 == NULL_TREE))
6815 static bool warned;
6817 if (!warned && warn_psabi)
6819 warned = true;
6820 inform (input_location,
6821 "the ABI of passing struct with"
6822 " a flexible array member has"
6823 " changed in GCC 4.4");
6825 continue;
6827 num = classify_argument (TYPE_MODE (type), type,
6828 subclasses,
6829 (int_bit_position (field)
6830 + bit_offset) % 512);
6831 if (!num)
6832 return 0;
6833 pos = (int_bit_position (field)
6834 + (bit_offset % 64)) / 8 / 8;
6835 for (i = 0; i < num && (i + pos) < words; i++)
6836 classes[i + pos] =
6837 merge_classes (subclasses[i], classes[i + pos]);
6841 break;
6843 case ARRAY_TYPE:
6844 /* Arrays are handled as small records. */
6846 int num;
6847 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6848 TREE_TYPE (type), subclasses, bit_offset);
6849 if (!num)
6850 return 0;
6852 /* The partial classes are now full classes. */
6853 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6854 subclasses[0] = X86_64_SSE_CLASS;
6855 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6856 && !((bit_offset % 64) == 0 && bytes == 4))
6857 subclasses[0] = X86_64_INTEGER_CLASS;
6859 for (i = 0; i < words; i++)
6860 classes[i] = subclasses[i % num];
6862 break;
6864 case UNION_TYPE:
6865 case QUAL_UNION_TYPE:
6866 /* Unions are similar to RECORD_TYPE but offset is always 0.
6868 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6870 if (TREE_CODE (field) == FIELD_DECL)
6872 int num;
6874 if (TREE_TYPE (field) == error_mark_node)
6875 continue;
6877 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6878 TREE_TYPE (field), subclasses,
6879 bit_offset);
6880 if (!num)
6881 return 0;
6882 for (i = 0; i < num && i < words; i++)
6883 classes[i] = merge_classes (subclasses[i], classes[i]);
6886 break;
6888 default:
6889 gcc_unreachable ();
6892 if (words > 2)
6894 /* When size > 16 bytes, if the first one isn't
6895 X86_64_SSE_CLASS or any other ones aren't
6896 X86_64_SSEUP_CLASS, everything should be passed in
6897 memory. */
6898 if (classes[0] != X86_64_SSE_CLASS)
6899 return 0;
6901 for (i = 1; i < words; i++)
6902 if (classes[i] != X86_64_SSEUP_CLASS)
6903 return 0;
6906 /* Final merger cleanup. */
6907 for (i = 0; i < words; i++)
6909 /* If one class is MEMORY, everything should be passed in
6910 memory. */
6911 if (classes[i] == X86_64_MEMORY_CLASS)
6912 return 0;
6914 /* The X86_64_SSEUP_CLASS should be always preceded by
6915 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6916 if (classes[i] == X86_64_SSEUP_CLASS
6917 && classes[i - 1] != X86_64_SSE_CLASS
6918 && classes[i - 1] != X86_64_SSEUP_CLASS)
6920 /* The first one should never be X86_64_SSEUP_CLASS. */
6921 gcc_assert (i != 0);
6922 classes[i] = X86_64_SSE_CLASS;
6925 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6926 everything should be passed in memory. */
6927 if (classes[i] == X86_64_X87UP_CLASS
6928 && (classes[i - 1] != X86_64_X87_CLASS))
6930 static bool warned;
6932 /* The first one should never be X86_64_X87UP_CLASS. */
6933 gcc_assert (i != 0);
6934 if (!warned && warn_psabi)
6936 warned = true;
6937 inform (input_location,
6938 "the ABI of passing union with long double"
6939 " has changed in GCC 4.4");
6941 return 0;
6944 return words;
6947 /* Compute alignment needed. We align all types to natural boundaries with
6948 exception of XFmode that is aligned to 64bits. */
6949 if (mode != VOIDmode && mode != BLKmode)
6951 int mode_alignment = GET_MODE_BITSIZE (mode);
6953 if (mode == XFmode)
6954 mode_alignment = 128;
6955 else if (mode == XCmode)
6956 mode_alignment = 256;
6957 if (COMPLEX_MODE_P (mode))
6958 mode_alignment /= 2;
6959 /* Misaligned fields are always returned in memory. */
6960 if (bit_offset % mode_alignment)
6961 return 0;
6964 /* for V1xx modes, just use the base mode */
6965 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6966 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6967 mode = GET_MODE_INNER (mode);
6969 /* Classification of atomic types. */
6970 switch (mode)
6972 case SDmode:
6973 case DDmode:
6974 classes[0] = X86_64_SSE_CLASS;
6975 return 1;
6976 case TDmode:
6977 classes[0] = X86_64_SSE_CLASS;
6978 classes[1] = X86_64_SSEUP_CLASS;
6979 return 2;
6980 case DImode:
6981 case SImode:
6982 case HImode:
6983 case QImode:
6984 case CSImode:
6985 case CHImode:
6986 case CQImode:
6988 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6990 /* Analyze last 128 bits only. */
6991 size = (size - 1) & 0x7f;
6993 if (size < 32)
6995 classes[0] = X86_64_INTEGERSI_CLASS;
6996 return 1;
6998 else if (size < 64)
7000 classes[0] = X86_64_INTEGER_CLASS;
7001 return 1;
7003 else if (size < 64+32)
7005 classes[0] = X86_64_INTEGER_CLASS;
7006 classes[1] = X86_64_INTEGERSI_CLASS;
7007 return 2;
7009 else if (size < 64+64)
7011 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7012 return 2;
7014 else
7015 gcc_unreachable ();
7017 case CDImode:
7018 case TImode:
7019 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7020 return 2;
7021 case COImode:
7022 case OImode:
7023 /* OImode shouldn't be used directly. */
7024 gcc_unreachable ();
7025 case CTImode:
7026 return 0;
7027 case SFmode:
7028 if (!(bit_offset % 64))
7029 classes[0] = X86_64_SSESF_CLASS;
7030 else
7031 classes[0] = X86_64_SSE_CLASS;
7032 return 1;
7033 case DFmode:
7034 classes[0] = X86_64_SSEDF_CLASS;
7035 return 1;
7036 case XFmode:
7037 classes[0] = X86_64_X87_CLASS;
7038 classes[1] = X86_64_X87UP_CLASS;
7039 return 2;
7040 case TFmode:
7041 classes[0] = X86_64_SSE_CLASS;
7042 classes[1] = X86_64_SSEUP_CLASS;
7043 return 2;
7044 case SCmode:
7045 classes[0] = X86_64_SSE_CLASS;
7046 if (!(bit_offset % 64))
7047 return 1;
7048 else
7050 static bool warned;
7052 if (!warned && warn_psabi)
7054 warned = true;
7055 inform (input_location,
7056 "the ABI of passing structure with complex float"
7057 " member has changed in GCC 4.4");
7059 classes[1] = X86_64_SSESF_CLASS;
7060 return 2;
7062 case DCmode:
7063 classes[0] = X86_64_SSEDF_CLASS;
7064 classes[1] = X86_64_SSEDF_CLASS;
7065 return 2;
7066 case XCmode:
7067 classes[0] = X86_64_COMPLEX_X87_CLASS;
7068 return 1;
7069 case TCmode:
7070 /* This modes is larger than 16 bytes. */
7071 return 0;
7072 case V8SFmode:
7073 case V8SImode:
7074 case V32QImode:
7075 case V16HImode:
7076 case V4DFmode:
7077 case V4DImode:
7078 classes[0] = X86_64_SSE_CLASS;
7079 classes[1] = X86_64_SSEUP_CLASS;
7080 classes[2] = X86_64_SSEUP_CLASS;
7081 classes[3] = X86_64_SSEUP_CLASS;
7082 return 4;
7083 case V8DFmode:
7084 case V16SFmode:
7085 case V8DImode:
7086 case V16SImode:
7087 case V32HImode:
7088 case V64QImode:
7089 classes[0] = X86_64_SSE_CLASS;
7090 classes[1] = X86_64_SSEUP_CLASS;
7091 classes[2] = X86_64_SSEUP_CLASS;
7092 classes[3] = X86_64_SSEUP_CLASS;
7093 classes[4] = X86_64_SSEUP_CLASS;
7094 classes[5] = X86_64_SSEUP_CLASS;
7095 classes[6] = X86_64_SSEUP_CLASS;
7096 classes[7] = X86_64_SSEUP_CLASS;
7097 return 8;
7098 case V4SFmode:
7099 case V4SImode:
7100 case V16QImode:
7101 case V8HImode:
7102 case V2DFmode:
7103 case V2DImode:
7104 classes[0] = X86_64_SSE_CLASS;
7105 classes[1] = X86_64_SSEUP_CLASS;
7106 return 2;
7107 case V1TImode:
7108 case V1DImode:
7109 case V2SFmode:
7110 case V2SImode:
7111 case V4HImode:
7112 case V8QImode:
7113 classes[0] = X86_64_SSE_CLASS;
7114 return 1;
7115 case BLKmode:
7116 case VOIDmode:
7117 return 0;
7118 default:
7119 gcc_assert (VECTOR_MODE_P (mode));
7121 if (bytes > 16)
7122 return 0;
7124 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7126 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7127 classes[0] = X86_64_INTEGERSI_CLASS;
7128 else
7129 classes[0] = X86_64_INTEGER_CLASS;
7130 classes[1] = X86_64_INTEGER_CLASS;
7131 return 1 + (bytes > 8);
7135 /* Examine the argument and return set number of register required in each
7136 class. Return true iff parameter should be passed in memory. */
7138 static bool
7139 examine_argument (machine_mode mode, const_tree type, int in_return,
7140 int *int_nregs, int *sse_nregs)
7142 enum x86_64_reg_class regclass[MAX_CLASSES];
7143 int n = classify_argument (mode, type, regclass, 0);
7145 *int_nregs = 0;
7146 *sse_nregs = 0;
7148 if (!n)
7149 return true;
7150 for (n--; n >= 0; n--)
7151 switch (regclass[n])
7153 case X86_64_INTEGER_CLASS:
7154 case X86_64_INTEGERSI_CLASS:
7155 (*int_nregs)++;
7156 break;
7157 case X86_64_SSE_CLASS:
7158 case X86_64_SSESF_CLASS:
7159 case X86_64_SSEDF_CLASS:
7160 (*sse_nregs)++;
7161 break;
7162 case X86_64_NO_CLASS:
7163 case X86_64_SSEUP_CLASS:
7164 break;
7165 case X86_64_X87_CLASS:
7166 case X86_64_X87UP_CLASS:
7167 case X86_64_COMPLEX_X87_CLASS:
7168 if (!in_return)
7169 return true;
7170 break;
7171 case X86_64_MEMORY_CLASS:
7172 gcc_unreachable ();
7175 return false;
7178 /* Construct container for the argument used by GCC interface. See
7179 FUNCTION_ARG for the detailed description. */
7181 static rtx
7182 construct_container (machine_mode mode, machine_mode orig_mode,
7183 const_tree type, int in_return, int nintregs, int nsseregs,
7184 const int *intreg, int sse_regno)
7186 /* The following variables hold the static issued_error state. */
7187 static bool issued_sse_arg_error;
7188 static bool issued_sse_ret_error;
7189 static bool issued_x87_ret_error;
7191 machine_mode tmpmode;
7192 int bytes =
7193 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7194 enum x86_64_reg_class regclass[MAX_CLASSES];
7195 int n;
7196 int i;
7197 int nexps = 0;
7198 int needed_sseregs, needed_intregs;
7199 rtx exp[MAX_CLASSES];
7200 rtx ret;
7202 n = classify_argument (mode, type, regclass, 0);
7203 if (!n)
7204 return NULL;
7205 if (examine_argument (mode, type, in_return, &needed_intregs,
7206 &needed_sseregs))
7207 return NULL;
7208 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7209 return NULL;
7211 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7212 some less clueful developer tries to use floating-point anyway. */
7213 if (needed_sseregs && !TARGET_SSE)
7215 if (in_return)
7217 if (!issued_sse_ret_error)
7219 error ("SSE register return with SSE disabled");
7220 issued_sse_ret_error = true;
7223 else if (!issued_sse_arg_error)
7225 error ("SSE register argument with SSE disabled");
7226 issued_sse_arg_error = true;
7228 return NULL;
7231 /* Likewise, error if the ABI requires us to return values in the
7232 x87 registers and the user specified -mno-80387. */
7233 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7234 for (i = 0; i < n; i++)
7235 if (regclass[i] == X86_64_X87_CLASS
7236 || regclass[i] == X86_64_X87UP_CLASS
7237 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7239 if (!issued_x87_ret_error)
7241 error ("x87 register return with x87 disabled");
7242 issued_x87_ret_error = true;
7244 return NULL;
7247 /* First construct simple cases. Avoid SCmode, since we want to use
7248 single register to pass this type. */
7249 if (n == 1 && mode != SCmode)
7250 switch (regclass[0])
7252 case X86_64_INTEGER_CLASS:
7253 case X86_64_INTEGERSI_CLASS:
7254 return gen_rtx_REG (mode, intreg[0]);
7255 case X86_64_SSE_CLASS:
7256 case X86_64_SSESF_CLASS:
7257 case X86_64_SSEDF_CLASS:
7258 if (mode != BLKmode)
7259 return gen_reg_or_parallel (mode, orig_mode,
7260 SSE_REGNO (sse_regno));
7261 break;
7262 case X86_64_X87_CLASS:
7263 case X86_64_COMPLEX_X87_CLASS:
7264 return gen_rtx_REG (mode, FIRST_STACK_REG);
7265 case X86_64_NO_CLASS:
7266 /* Zero sized array, struct or class. */
7267 return NULL;
7268 default:
7269 gcc_unreachable ();
7271 if (n == 2
7272 && regclass[0] == X86_64_SSE_CLASS
7273 && regclass[1] == X86_64_SSEUP_CLASS
7274 && mode != BLKmode)
7275 return gen_reg_or_parallel (mode, orig_mode,
7276 SSE_REGNO (sse_regno));
7277 if (n == 4
7278 && regclass[0] == X86_64_SSE_CLASS
7279 && regclass[1] == X86_64_SSEUP_CLASS
7280 && regclass[2] == X86_64_SSEUP_CLASS
7281 && regclass[3] == X86_64_SSEUP_CLASS
7282 && mode != BLKmode)
7283 return gen_reg_or_parallel (mode, orig_mode,
7284 SSE_REGNO (sse_regno));
7285 if (n == 8
7286 && regclass[0] == X86_64_SSE_CLASS
7287 && regclass[1] == X86_64_SSEUP_CLASS
7288 && regclass[2] == X86_64_SSEUP_CLASS
7289 && regclass[3] == X86_64_SSEUP_CLASS
7290 && regclass[4] == X86_64_SSEUP_CLASS
7291 && regclass[5] == X86_64_SSEUP_CLASS
7292 && regclass[6] == X86_64_SSEUP_CLASS
7293 && regclass[7] == X86_64_SSEUP_CLASS
7294 && mode != BLKmode)
7295 return gen_reg_or_parallel (mode, orig_mode,
7296 SSE_REGNO (sse_regno));
7297 if (n == 2
7298 && regclass[0] == X86_64_X87_CLASS
7299 && regclass[1] == X86_64_X87UP_CLASS)
7300 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7302 if (n == 2
7303 && regclass[0] == X86_64_INTEGER_CLASS
7304 && regclass[1] == X86_64_INTEGER_CLASS
7305 && (mode == CDImode || mode == TImode)
7306 && intreg[0] + 1 == intreg[1])
7307 return gen_rtx_REG (mode, intreg[0]);
7309 /* Otherwise figure out the entries of the PARALLEL. */
7310 for (i = 0; i < n; i++)
7312 int pos;
7314 switch (regclass[i])
7316 case X86_64_NO_CLASS:
7317 break;
7318 case X86_64_INTEGER_CLASS:
7319 case X86_64_INTEGERSI_CLASS:
7320 /* Merge TImodes on aligned occasions here too. */
7321 if (i * 8 + 8 > bytes)
7322 tmpmode
7323 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7324 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7325 tmpmode = SImode;
7326 else
7327 tmpmode = DImode;
7328 /* We've requested 24 bytes we
7329 don't have mode for. Use DImode. */
7330 if (tmpmode == BLKmode)
7331 tmpmode = DImode;
7332 exp [nexps++]
7333 = gen_rtx_EXPR_LIST (VOIDmode,
7334 gen_rtx_REG (tmpmode, *intreg),
7335 GEN_INT (i*8));
7336 intreg++;
7337 break;
7338 case X86_64_SSESF_CLASS:
7339 exp [nexps++]
7340 = gen_rtx_EXPR_LIST (VOIDmode,
7341 gen_rtx_REG (SFmode,
7342 SSE_REGNO (sse_regno)),
7343 GEN_INT (i*8));
7344 sse_regno++;
7345 break;
7346 case X86_64_SSEDF_CLASS:
7347 exp [nexps++]
7348 = gen_rtx_EXPR_LIST (VOIDmode,
7349 gen_rtx_REG (DFmode,
7350 SSE_REGNO (sse_regno)),
7351 GEN_INT (i*8));
7352 sse_regno++;
7353 break;
7354 case X86_64_SSE_CLASS:
7355 pos = i;
7356 switch (n)
7358 case 1:
7359 tmpmode = DImode;
7360 break;
7361 case 2:
7362 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7364 tmpmode = TImode;
7365 i++;
7367 else
7368 tmpmode = DImode;
7369 break;
7370 case 4:
7371 gcc_assert (i == 0
7372 && regclass[1] == X86_64_SSEUP_CLASS
7373 && regclass[2] == X86_64_SSEUP_CLASS
7374 && regclass[3] == X86_64_SSEUP_CLASS);
7375 tmpmode = OImode;
7376 i += 3;
7377 break;
7378 case 8:
7379 gcc_assert (i == 0
7380 && regclass[1] == X86_64_SSEUP_CLASS
7381 && regclass[2] == X86_64_SSEUP_CLASS
7382 && regclass[3] == X86_64_SSEUP_CLASS
7383 && regclass[4] == X86_64_SSEUP_CLASS
7384 && regclass[5] == X86_64_SSEUP_CLASS
7385 && regclass[6] == X86_64_SSEUP_CLASS
7386 && regclass[7] == X86_64_SSEUP_CLASS);
7387 tmpmode = XImode;
7388 i += 7;
7389 break;
7390 default:
7391 gcc_unreachable ();
7393 exp [nexps++]
7394 = gen_rtx_EXPR_LIST (VOIDmode,
7395 gen_rtx_REG (tmpmode,
7396 SSE_REGNO (sse_regno)),
7397 GEN_INT (pos*8));
7398 sse_regno++;
7399 break;
7400 default:
7401 gcc_unreachable ();
7405 /* Empty aligned struct, union or class. */
7406 if (nexps == 0)
7407 return NULL;
7409 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7410 for (i = 0; i < nexps; i++)
7411 XVECEXP (ret, 0, i) = exp [i];
7412 return ret;
7415 /* Update the data in CUM to advance over an argument of mode MODE
7416 and data type TYPE. (TYPE is null for libcalls where that information
7417 may not be available.)
7419 Return a number of integer regsiters advanced over. */
7421 static int
7422 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7423 const_tree type, HOST_WIDE_INT bytes,
7424 HOST_WIDE_INT words)
7426 int res = 0;
7428 switch (mode)
7430 default:
7431 break;
7433 case BLKmode:
7434 if (bytes < 0)
7435 break;
7436 /* FALLTHRU */
7438 case DImode:
7439 case SImode:
7440 case HImode:
7441 case QImode:
7442 cum->words += words;
7443 cum->nregs -= words;
7444 cum->regno += words;
7445 if (cum->nregs >= 0)
7446 res = words;
7447 if (cum->nregs <= 0)
7449 cum->nregs = 0;
7450 cum->regno = 0;
7452 break;
7454 case OImode:
7455 /* OImode shouldn't be used directly. */
7456 gcc_unreachable ();
7458 case DFmode:
7459 if (cum->float_in_sse < 2)
7460 break;
7461 case SFmode:
7462 if (cum->float_in_sse < 1)
7463 break;
7464 /* FALLTHRU */
7466 case V8SFmode:
7467 case V8SImode:
7468 case V64QImode:
7469 case V32HImode:
7470 case V16SImode:
7471 case V8DImode:
7472 case V16SFmode:
7473 case V8DFmode:
7474 case V32QImode:
7475 case V16HImode:
7476 case V4DFmode:
7477 case V4DImode:
7478 case TImode:
7479 case V16QImode:
7480 case V8HImode:
7481 case V4SImode:
7482 case V2DImode:
7483 case V4SFmode:
7484 case V2DFmode:
7485 if (!type || !AGGREGATE_TYPE_P (type))
7487 cum->sse_words += words;
7488 cum->sse_nregs -= 1;
7489 cum->sse_regno += 1;
7490 if (cum->sse_nregs <= 0)
7492 cum->sse_nregs = 0;
7493 cum->sse_regno = 0;
7496 break;
7498 case V8QImode:
7499 case V4HImode:
7500 case V2SImode:
7501 case V2SFmode:
7502 case V1TImode:
7503 case V1DImode:
7504 if (!type || !AGGREGATE_TYPE_P (type))
7506 cum->mmx_words += words;
7507 cum->mmx_nregs -= 1;
7508 cum->mmx_regno += 1;
7509 if (cum->mmx_nregs <= 0)
7511 cum->mmx_nregs = 0;
7512 cum->mmx_regno = 0;
7515 break;
7518 return res;
7521 static int
7522 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7523 const_tree type, HOST_WIDE_INT words, bool named)
7525 int int_nregs, sse_nregs;
7527 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7528 if (!named && (VALID_AVX512F_REG_MODE (mode)
7529 || VALID_AVX256_REG_MODE (mode)))
7530 return 0;
7532 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7533 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7535 cum->nregs -= int_nregs;
7536 cum->sse_nregs -= sse_nregs;
7537 cum->regno += int_nregs;
7538 cum->sse_regno += sse_nregs;
7539 return int_nregs;
7541 else
7543 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7544 cum->words = (cum->words + align - 1) & ~(align - 1);
7545 cum->words += words;
7546 return 0;
7550 static int
7551 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7552 HOST_WIDE_INT words)
7554 /* Otherwise, this should be passed indirect. */
7555 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7557 cum->words += words;
7558 if (cum->nregs > 0)
7560 cum->nregs -= 1;
7561 cum->regno += 1;
7562 return 1;
7564 return 0;
7567 /* Update the data in CUM to advance over an argument of mode MODE and
7568 data type TYPE. (TYPE is null for libcalls where that information
7569 may not be available.) */
7571 static void
7572 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7573 const_tree type, bool named)
7575 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7576 HOST_WIDE_INT bytes, words;
7577 int nregs;
7579 if (mode == BLKmode)
7580 bytes = int_size_in_bytes (type);
7581 else
7582 bytes = GET_MODE_SIZE (mode);
7583 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7585 if (type)
7586 mode = type_natural_mode (type, NULL, false);
7588 if ((type && POINTER_BOUNDS_TYPE_P (type))
7589 || POINTER_BOUNDS_MODE_P (mode))
7591 /* If we pass bounds in BT then just update remained bounds count. */
7592 if (cum->bnds_in_bt)
7594 cum->bnds_in_bt--;
7595 return;
7598 /* Update remained number of bounds to force. */
7599 if (cum->force_bnd_pass)
7600 cum->force_bnd_pass--;
7602 cum->bnd_regno++;
7604 return;
7607 /* The first arg not going to Bounds Tables resets this counter. */
7608 cum->bnds_in_bt = 0;
7609 /* For unnamed args we always pass bounds to avoid bounds mess when
7610 passed and received types do not match. If bounds do not follow
7611 unnamed arg, still pretend required number of bounds were passed. */
7612 if (cum->force_bnd_pass)
7614 cum->bnd_regno += cum->force_bnd_pass;
7615 cum->force_bnd_pass = 0;
7618 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7619 nregs = function_arg_advance_ms_64 (cum, bytes, words);
7620 else if (TARGET_64BIT)
7621 nregs = function_arg_advance_64 (cum, mode, type, words, named);
7622 else
7623 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7625 /* For stdarg we expect bounds to be passed for each value passed
7626 in register. */
7627 if (cum->stdarg)
7628 cum->force_bnd_pass = nregs;
7629 /* For pointers passed in memory we expect bounds passed in Bounds
7630 Table. */
7631 if (!nregs)
7632 cum->bnds_in_bt = chkp_type_bounds_count (type);
7635 /* Define where to put the arguments to a function.
7636 Value is zero to push the argument on the stack,
7637 or a hard register in which to store the argument.
7639 MODE is the argument's machine mode.
7640 TYPE is the data type of the argument (as a tree).
7641 This is null for libcalls where that information may
7642 not be available.
7643 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7644 the preceding args and about the function being called.
7645 NAMED is nonzero if this argument is a named parameter
7646 (otherwise it is an extra parameter matching an ellipsis). */
7648 static rtx
7649 function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7650 machine_mode orig_mode, const_tree type,
7651 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7653 /* Avoid the AL settings for the Unix64 ABI. */
7654 if (mode == VOIDmode)
7655 return constm1_rtx;
7657 switch (mode)
7659 default:
7660 break;
7662 case BLKmode:
7663 if (bytes < 0)
7664 break;
7665 /* FALLTHRU */
7666 case DImode:
7667 case SImode:
7668 case HImode:
7669 case QImode:
7670 if (words <= cum->nregs)
7672 int regno = cum->regno;
7674 /* Fastcall allocates the first two DWORD (SImode) or
7675 smaller arguments to ECX and EDX if it isn't an
7676 aggregate type . */
7677 if (cum->fastcall)
7679 if (mode == BLKmode
7680 || mode == DImode
7681 || (type && AGGREGATE_TYPE_P (type)))
7682 break;
7684 /* ECX not EAX is the first allocated register. */
7685 if (regno == AX_REG)
7686 regno = CX_REG;
7688 return gen_rtx_REG (mode, regno);
7690 break;
7692 case DFmode:
7693 if (cum->float_in_sse < 2)
7694 break;
7695 case SFmode:
7696 if (cum->float_in_sse < 1)
7697 break;
7698 /* FALLTHRU */
7699 case TImode:
7700 /* In 32bit, we pass TImode in xmm registers. */
7701 case V16QImode:
7702 case V8HImode:
7703 case V4SImode:
7704 case V2DImode:
7705 case V4SFmode:
7706 case V2DFmode:
7707 if (!type || !AGGREGATE_TYPE_P (type))
7709 if (cum->sse_nregs)
7710 return gen_reg_or_parallel (mode, orig_mode,
7711 cum->sse_regno + FIRST_SSE_REG);
7713 break;
7715 case OImode:
7716 case XImode:
7717 /* OImode and XImode shouldn't be used directly. */
7718 gcc_unreachable ();
7720 case V64QImode:
7721 case V32HImode:
7722 case V16SImode:
7723 case V8DImode:
7724 case V16SFmode:
7725 case V8DFmode:
7726 case V8SFmode:
7727 case V8SImode:
7728 case V32QImode:
7729 case V16HImode:
7730 case V4DFmode:
7731 case V4DImode:
7732 if (!type || !AGGREGATE_TYPE_P (type))
7734 if (cum->sse_nregs)
7735 return gen_reg_or_parallel (mode, orig_mode,
7736 cum->sse_regno + FIRST_SSE_REG);
7738 break;
7740 case V8QImode:
7741 case V4HImode:
7742 case V2SImode:
7743 case V2SFmode:
7744 case V1TImode:
7745 case V1DImode:
7746 if (!type || !AGGREGATE_TYPE_P (type))
7748 if (cum->mmx_nregs)
7749 return gen_reg_or_parallel (mode, orig_mode,
7750 cum->mmx_regno + FIRST_MMX_REG);
7752 break;
7755 return NULL_RTX;
7758 static rtx
7759 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7760 machine_mode orig_mode, const_tree type, bool named)
7762 /* Handle a hidden AL argument containing number of registers
7763 for varargs x86-64 functions. */
7764 if (mode == VOIDmode)
7765 return GEN_INT (cum->maybe_vaarg
7766 ? (cum->sse_nregs < 0
7767 ? X86_64_SSE_REGPARM_MAX
7768 : cum->sse_regno)
7769 : -1);
7771 switch (mode)
7773 default:
7774 break;
7776 case V8SFmode:
7777 case V8SImode:
7778 case V32QImode:
7779 case V16HImode:
7780 case V4DFmode:
7781 case V4DImode:
7782 case V16SFmode:
7783 case V16SImode:
7784 case V64QImode:
7785 case V32HImode:
7786 case V8DFmode:
7787 case V8DImode:
7788 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7789 if (!named)
7790 return NULL;
7791 break;
7794 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7795 cum->sse_nregs,
7796 &x86_64_int_parameter_registers [cum->regno],
7797 cum->sse_regno);
7800 static rtx
7801 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7802 machine_mode orig_mode, bool named,
7803 HOST_WIDE_INT bytes)
7805 unsigned int regno;
7807 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7808 We use value of -2 to specify that current function call is MSABI. */
7809 if (mode == VOIDmode)
7810 return GEN_INT (-2);
7812 /* If we've run out of registers, it goes on the stack. */
7813 if (cum->nregs == 0)
7814 return NULL_RTX;
7816 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7818 /* Only floating point modes are passed in anything but integer regs. */
7819 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7821 if (named)
7822 regno = cum->regno + FIRST_SSE_REG;
7823 else
7825 rtx t1, t2;
7827 /* Unnamed floating parameters are passed in both the
7828 SSE and integer registers. */
7829 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7830 t2 = gen_rtx_REG (mode, regno);
7831 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7832 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7833 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7836 /* Handle aggregated types passed in register. */
7837 if (orig_mode == BLKmode)
7839 if (bytes > 0 && bytes <= 8)
7840 mode = (bytes > 4 ? DImode : SImode);
7841 if (mode == BLKmode)
7842 mode = DImode;
7845 return gen_reg_or_parallel (mode, orig_mode, regno);
7848 /* Return where to put the arguments to a function.
7849 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7851 MODE is the argument's machine mode. TYPE is the data type of the
7852 argument. It is null for libcalls where that information may not be
7853 available. CUM gives information about the preceding args and about
7854 the function being called. NAMED is nonzero if this argument is a
7855 named parameter (otherwise it is an extra parameter matching an
7856 ellipsis). */
7858 static rtx
7859 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
7860 const_tree type, bool named)
7862 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7863 machine_mode mode = omode;
7864 HOST_WIDE_INT bytes, words;
7865 rtx arg;
7867 /* All pointer bounds argumntas are handled separately here. */
7868 if ((type && POINTER_BOUNDS_TYPE_P (type))
7869 || POINTER_BOUNDS_MODE_P (mode))
7871 /* Return NULL if bounds are forced to go in Bounds Table. */
7872 if (cum->bnds_in_bt)
7873 arg = NULL;
7874 /* Return the next available bound reg if any. */
7875 else if (cum->bnd_regno <= LAST_BND_REG)
7876 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
7877 /* Return the next special slot number otherwise. */
7878 else
7879 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
7881 return arg;
7884 if (mode == BLKmode)
7885 bytes = int_size_in_bytes (type);
7886 else
7887 bytes = GET_MODE_SIZE (mode);
7888 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7890 /* To simplify the code below, represent vector types with a vector mode
7891 even if MMX/SSE are not active. */
7892 if (type && TREE_CODE (type) == VECTOR_TYPE)
7893 mode = type_natural_mode (type, cum, false);
7895 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7896 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7897 else if (TARGET_64BIT)
7898 arg = function_arg_64 (cum, mode, omode, type, named);
7899 else
7900 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7902 return arg;
7905 /* A C expression that indicates when an argument must be passed by
7906 reference. If nonzero for an argument, a copy of that argument is
7907 made in memory and a pointer to the argument is passed instead of
7908 the argument itself. The pointer is passed in whatever way is
7909 appropriate for passing a pointer to that type. */
7911 static bool
7912 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7913 const_tree type, bool)
7915 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7917 /* Bounds are never passed by reference. */
7918 if ((type && POINTER_BOUNDS_TYPE_P (type))
7919 || POINTER_BOUNDS_MODE_P (mode))
7920 return false;
7922 /* See Windows x64 Software Convention. */
7923 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7925 int msize = (int) GET_MODE_SIZE (mode);
7926 if (type)
7928 /* Arrays are passed by reference. */
7929 if (TREE_CODE (type) == ARRAY_TYPE)
7930 return true;
7932 if (AGGREGATE_TYPE_P (type))
7934 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7935 are passed by reference. */
7936 msize = int_size_in_bytes (type);
7940 /* __m128 is passed by reference. */
7941 switch (msize) {
7942 case 1: case 2: case 4: case 8:
7943 break;
7944 default:
7945 return true;
7948 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7949 return 1;
7951 return 0;
7954 /* Return true when TYPE should be 128bit aligned for 32bit argument
7955 passing ABI. XXX: This function is obsolete and is only used for
7956 checking psABI compatibility with previous versions of GCC. */
7958 static bool
7959 ix86_compat_aligned_value_p (const_tree type)
7961 machine_mode mode = TYPE_MODE (type);
7962 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7963 || mode == TDmode
7964 || mode == TFmode
7965 || mode == TCmode)
7966 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7967 return true;
7968 if (TYPE_ALIGN (type) < 128)
7969 return false;
7971 if (AGGREGATE_TYPE_P (type))
7973 /* Walk the aggregates recursively. */
7974 switch (TREE_CODE (type))
7976 case RECORD_TYPE:
7977 case UNION_TYPE:
7978 case QUAL_UNION_TYPE:
7980 tree field;
7982 /* Walk all the structure fields. */
7983 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7985 if (TREE_CODE (field) == FIELD_DECL
7986 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7987 return true;
7989 break;
7992 case ARRAY_TYPE:
7993 /* Just for use if some languages passes arrays by value. */
7994 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7995 return true;
7996 break;
7998 default:
7999 gcc_unreachable ();
8002 return false;
8005 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
8006 XXX: This function is obsolete and is only used for checking psABI
8007 compatibility with previous versions of GCC. */
8009 static unsigned int
8010 ix86_compat_function_arg_boundary (machine_mode mode,
8011 const_tree type, unsigned int align)
8013 /* In 32bit, only _Decimal128 and __float128 are aligned to their
8014 natural boundaries. */
8015 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
8017 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
8018 make an exception for SSE modes since these require 128bit
8019 alignment.
8021 The handling here differs from field_alignment. ICC aligns MMX
8022 arguments to 4 byte boundaries, while structure fields are aligned
8023 to 8 byte boundaries. */
8024 if (!type)
8026 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
8027 align = PARM_BOUNDARY;
8029 else
8031 if (!ix86_compat_aligned_value_p (type))
8032 align = PARM_BOUNDARY;
8035 if (align > BIGGEST_ALIGNMENT)
8036 align = BIGGEST_ALIGNMENT;
8037 return align;
8040 /* Return true when TYPE should be 128bit aligned for 32bit argument
8041 passing ABI. */
8043 static bool
8044 ix86_contains_aligned_value_p (const_tree type)
8046 machine_mode mode = TYPE_MODE (type);
8048 if (mode == XFmode || mode == XCmode)
8049 return false;
8051 if (TYPE_ALIGN (type) < 128)
8052 return false;
8054 if (AGGREGATE_TYPE_P (type))
8056 /* Walk the aggregates recursively. */
8057 switch (TREE_CODE (type))
8059 case RECORD_TYPE:
8060 case UNION_TYPE:
8061 case QUAL_UNION_TYPE:
8063 tree field;
8065 /* Walk all the structure fields. */
8066 for (field = TYPE_FIELDS (type);
8067 field;
8068 field = DECL_CHAIN (field))
8070 if (TREE_CODE (field) == FIELD_DECL
8071 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
8072 return true;
8074 break;
8077 case ARRAY_TYPE:
8078 /* Just for use if some languages passes arrays by value. */
8079 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
8080 return true;
8081 break;
8083 default:
8084 gcc_unreachable ();
8087 else
8088 return TYPE_ALIGN (type) >= 128;
8090 return false;
8093 /* Gives the alignment boundary, in bits, of an argument with the
8094 specified mode and type. */
8096 static unsigned int
8097 ix86_function_arg_boundary (machine_mode mode, const_tree type)
8099 unsigned int align;
8100 if (type)
8102 /* Since the main variant type is used for call, we convert it to
8103 the main variant type. */
8104 type = TYPE_MAIN_VARIANT (type);
8105 align = TYPE_ALIGN (type);
8107 else
8108 align = GET_MODE_ALIGNMENT (mode);
8109 if (align < PARM_BOUNDARY)
8110 align = PARM_BOUNDARY;
8111 else
8113 static bool warned;
8114 unsigned int saved_align = align;
8116 if (!TARGET_64BIT)
8118 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8119 if (!type)
8121 if (mode == XFmode || mode == XCmode)
8122 align = PARM_BOUNDARY;
8124 else if (!ix86_contains_aligned_value_p (type))
8125 align = PARM_BOUNDARY;
8127 if (align < 128)
8128 align = PARM_BOUNDARY;
8131 if (warn_psabi
8132 && !warned
8133 && align != ix86_compat_function_arg_boundary (mode, type,
8134 saved_align))
8136 warned = true;
8137 inform (input_location,
8138 "The ABI for passing parameters with %d-byte"
8139 " alignment has changed in GCC 4.6",
8140 align / BITS_PER_UNIT);
8144 return align;
8147 /* Return true if N is a possible register number of function value. */
8149 static bool
8150 ix86_function_value_regno_p (const unsigned int regno)
8152 switch (regno)
8154 case AX_REG:
8155 return true;
8156 case DX_REG:
8157 return (!TARGET_64BIT || ix86_abi != MS_ABI);
8158 case DI_REG:
8159 case SI_REG:
8160 return TARGET_64BIT && ix86_abi != MS_ABI;
8162 case FIRST_BND_REG:
8163 return chkp_function_instrumented_p (current_function_decl);
8165 /* Complex values are returned in %st(0)/%st(1) pair. */
8166 case ST0_REG:
8167 case ST1_REG:
8168 /* TODO: The function should depend on current function ABI but
8169 builtins.c would need updating then. Therefore we use the
8170 default ABI. */
8171 if (TARGET_64BIT && ix86_abi == MS_ABI)
8172 return false;
8173 return TARGET_FLOAT_RETURNS_IN_80387;
8175 /* Complex values are returned in %xmm0/%xmm1 pair. */
8176 case XMM0_REG:
8177 case XMM1_REG:
8178 return TARGET_SSE;
8180 case MM0_REG:
8181 if (TARGET_MACHO || TARGET_64BIT)
8182 return false;
8183 return TARGET_MMX;
8186 return false;
8189 /* Define how to find the value returned by a function.
8190 VALTYPE is the data type of the value (as a tree).
8191 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8192 otherwise, FUNC is 0. */
8194 static rtx
8195 function_value_32 (machine_mode orig_mode, machine_mode mode,
8196 const_tree fntype, const_tree fn)
8198 unsigned int regno;
8200 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8201 we normally prevent this case when mmx is not available. However
8202 some ABIs may require the result to be returned like DImode. */
8203 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8204 regno = FIRST_MMX_REG;
8206 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8207 we prevent this case when sse is not available. However some ABIs
8208 may require the result to be returned like integer TImode. */
8209 else if (mode == TImode
8210 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8211 regno = FIRST_SSE_REG;
8213 /* 32-byte vector modes in %ymm0. */
8214 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8215 regno = FIRST_SSE_REG;
8217 /* 64-byte vector modes in %zmm0. */
8218 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8219 regno = FIRST_SSE_REG;
8221 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8222 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8223 regno = FIRST_FLOAT_REG;
8224 else
8225 /* Most things go in %eax. */
8226 regno = AX_REG;
8228 /* Override FP return register with %xmm0 for local functions when
8229 SSE math is enabled or for functions with sseregparm attribute. */
8230 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8232 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8233 if ((sse_level >= 1 && mode == SFmode)
8234 || (sse_level == 2 && mode == DFmode))
8235 regno = FIRST_SSE_REG;
8238 /* OImode shouldn't be used directly. */
8239 gcc_assert (mode != OImode);
8241 return gen_rtx_REG (orig_mode, regno);
8244 static rtx
8245 function_value_64 (machine_mode orig_mode, machine_mode mode,
8246 const_tree valtype)
8248 rtx ret;
8250 /* Handle libcalls, which don't provide a type node. */
8251 if (valtype == NULL)
8253 unsigned int regno;
8255 switch (mode)
8257 case SFmode:
8258 case SCmode:
8259 case DFmode:
8260 case DCmode:
8261 case TFmode:
8262 case SDmode:
8263 case DDmode:
8264 case TDmode:
8265 regno = FIRST_SSE_REG;
8266 break;
8267 case XFmode:
8268 case XCmode:
8269 regno = FIRST_FLOAT_REG;
8270 break;
8271 case TCmode:
8272 return NULL;
8273 default:
8274 regno = AX_REG;
8277 return gen_rtx_REG (mode, regno);
8279 else if (POINTER_TYPE_P (valtype))
8281 /* Pointers are always returned in word_mode. */
8282 mode = word_mode;
8285 ret = construct_container (mode, orig_mode, valtype, 1,
8286 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8287 x86_64_int_return_registers, 0);
8289 /* For zero sized structures, construct_container returns NULL, but we
8290 need to keep rest of compiler happy by returning meaningful value. */
8291 if (!ret)
8292 ret = gen_rtx_REG (orig_mode, AX_REG);
8294 return ret;
8297 static rtx
8298 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8299 const_tree valtype)
8301 unsigned int regno = AX_REG;
8303 if (TARGET_SSE)
8305 switch (GET_MODE_SIZE (mode))
8307 case 16:
8308 if (valtype != NULL_TREE
8309 && !VECTOR_INTEGER_TYPE_P (valtype)
8310 && !VECTOR_INTEGER_TYPE_P (valtype)
8311 && !INTEGRAL_TYPE_P (valtype)
8312 && !VECTOR_FLOAT_TYPE_P (valtype))
8313 break;
8314 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8315 && !COMPLEX_MODE_P (mode))
8316 regno = FIRST_SSE_REG;
8317 break;
8318 case 8:
8319 case 4:
8320 if (mode == SFmode || mode == DFmode)
8321 regno = FIRST_SSE_REG;
8322 break;
8323 default:
8324 break;
8327 return gen_rtx_REG (orig_mode, regno);
8330 static rtx
8331 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8332 machine_mode orig_mode, machine_mode mode)
8334 const_tree fn, fntype;
8336 fn = NULL_TREE;
8337 if (fntype_or_decl && DECL_P (fntype_or_decl))
8338 fn = fntype_or_decl;
8339 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8341 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8342 || POINTER_BOUNDS_MODE_P (mode))
8343 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8344 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8345 return function_value_ms_64 (orig_mode, mode, valtype);
8346 else if (TARGET_64BIT)
8347 return function_value_64 (orig_mode, mode, valtype);
8348 else
8349 return function_value_32 (orig_mode, mode, fntype, fn);
8352 static rtx
8353 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8355 machine_mode mode, orig_mode;
8357 orig_mode = TYPE_MODE (valtype);
8358 mode = type_natural_mode (valtype, NULL, true);
8359 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8362 /* Return an RTX representing a place where a function returns
8363 or recieves pointer bounds or NULL if no bounds are returned.
8365 VALTYPE is a data type of a value returned by the function.
8367 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8368 or FUNCTION_TYPE of the function.
8370 If OUTGOING is false, return a place in which the caller will
8371 see the return value. Otherwise, return a place where a
8372 function returns a value. */
8374 static rtx
8375 ix86_function_value_bounds (const_tree valtype,
8376 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8377 bool outgoing ATTRIBUTE_UNUSED)
8379 rtx res = NULL_RTX;
8381 if (BOUNDED_TYPE_P (valtype))
8382 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8383 else if (chkp_type_has_pointer (valtype))
8385 bitmap slots;
8386 rtx bounds[2];
8387 bitmap_iterator bi;
8388 unsigned i, bnd_no = 0;
8390 bitmap_obstack_initialize (NULL);
8391 slots = BITMAP_ALLOC (NULL);
8392 chkp_find_bound_slots (valtype, slots);
8394 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8396 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8397 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8398 gcc_assert (bnd_no < 2);
8399 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8402 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8404 BITMAP_FREE (slots);
8405 bitmap_obstack_release (NULL);
8407 else
8408 res = NULL_RTX;
8410 return res;
8413 /* Pointer function arguments and return values are promoted to
8414 word_mode. */
8416 static machine_mode
8417 ix86_promote_function_mode (const_tree type, machine_mode mode,
8418 int *punsignedp, const_tree fntype,
8419 int for_return)
8421 if (type != NULL_TREE && POINTER_TYPE_P (type))
8423 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8424 return word_mode;
8426 return default_promote_function_mode (type, mode, punsignedp, fntype,
8427 for_return);
8430 /* Return true if a structure, union or array with MODE containing FIELD
8431 should be accessed using BLKmode. */
8433 static bool
8434 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8436 /* Union with XFmode must be in BLKmode. */
8437 return (mode == XFmode
8438 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8439 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8443 ix86_libcall_value (machine_mode mode)
8445 return ix86_function_value_1 (NULL, NULL, mode, mode);
8448 /* Return true iff type is returned in memory. */
8450 static bool
8451 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8453 #ifdef SUBTARGET_RETURN_IN_MEMORY
8454 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8455 #else
8456 const machine_mode mode = type_natural_mode (type, NULL, true);
8457 HOST_WIDE_INT size;
8459 if (POINTER_BOUNDS_TYPE_P (type))
8460 return false;
8462 if (TARGET_64BIT)
8464 if (ix86_function_type_abi (fntype) == MS_ABI)
8466 size = int_size_in_bytes (type);
8468 /* __m128 is returned in xmm0. */
8469 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8470 || INTEGRAL_TYPE_P (type)
8471 || VECTOR_FLOAT_TYPE_P (type))
8472 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8473 && !COMPLEX_MODE_P (mode)
8474 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8475 return false;
8477 /* Otherwise, the size must be exactly in [1248]. */
8478 return size != 1 && size != 2 && size != 4 && size != 8;
8480 else
8482 int needed_intregs, needed_sseregs;
8484 return examine_argument (mode, type, 1,
8485 &needed_intregs, &needed_sseregs);
8488 else
8490 if (mode == BLKmode)
8491 return true;
8493 size = int_size_in_bytes (type);
8495 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8496 return false;
8498 if (VECTOR_MODE_P (mode) || mode == TImode)
8500 /* User-created vectors small enough to fit in EAX. */
8501 if (size < 8)
8502 return false;
8504 /* Unless ABI prescibes otherwise,
8505 MMX/3dNow values are returned in MM0 if available. */
8507 if (size == 8)
8508 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8510 /* SSE values are returned in XMM0 if available. */
8511 if (size == 16)
8512 return !TARGET_SSE;
8514 /* AVX values are returned in YMM0 if available. */
8515 if (size == 32)
8516 return !TARGET_AVX;
8518 /* AVX512F values are returned in ZMM0 if available. */
8519 if (size == 64)
8520 return !TARGET_AVX512F;
8523 if (mode == XFmode)
8524 return false;
8526 if (size > 12)
8527 return true;
8529 /* OImode shouldn't be used directly. */
8530 gcc_assert (mode != OImode);
8532 return false;
8534 #endif
8538 /* Create the va_list data type. */
8540 /* Returns the calling convention specific va_list date type.
8541 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8543 static tree
8544 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8546 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8548 /* For i386 we use plain pointer to argument area. */
8549 if (!TARGET_64BIT || abi == MS_ABI)
8550 return build_pointer_type (char_type_node);
8552 record = lang_hooks.types.make_type (RECORD_TYPE);
8553 type_decl = build_decl (BUILTINS_LOCATION,
8554 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8556 f_gpr = build_decl (BUILTINS_LOCATION,
8557 FIELD_DECL, get_identifier ("gp_offset"),
8558 unsigned_type_node);
8559 f_fpr = build_decl (BUILTINS_LOCATION,
8560 FIELD_DECL, get_identifier ("fp_offset"),
8561 unsigned_type_node);
8562 f_ovf = build_decl (BUILTINS_LOCATION,
8563 FIELD_DECL, get_identifier ("overflow_arg_area"),
8564 ptr_type_node);
8565 f_sav = build_decl (BUILTINS_LOCATION,
8566 FIELD_DECL, get_identifier ("reg_save_area"),
8567 ptr_type_node);
8569 va_list_gpr_counter_field = f_gpr;
8570 va_list_fpr_counter_field = f_fpr;
8572 DECL_FIELD_CONTEXT (f_gpr) = record;
8573 DECL_FIELD_CONTEXT (f_fpr) = record;
8574 DECL_FIELD_CONTEXT (f_ovf) = record;
8575 DECL_FIELD_CONTEXT (f_sav) = record;
8577 TYPE_STUB_DECL (record) = type_decl;
8578 TYPE_NAME (record) = type_decl;
8579 TYPE_FIELDS (record) = f_gpr;
8580 DECL_CHAIN (f_gpr) = f_fpr;
8581 DECL_CHAIN (f_fpr) = f_ovf;
8582 DECL_CHAIN (f_ovf) = f_sav;
8584 layout_type (record);
8586 /* The correct type is an array type of one element. */
8587 return build_array_type (record, build_index_type (size_zero_node));
8590 /* Setup the builtin va_list data type and for 64-bit the additional
8591 calling convention specific va_list data types. */
8593 static tree
8594 ix86_build_builtin_va_list (void)
8596 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8598 /* Initialize abi specific va_list builtin types. */
8599 if (TARGET_64BIT)
8601 tree t;
8602 if (ix86_abi == MS_ABI)
8604 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8605 if (TREE_CODE (t) != RECORD_TYPE)
8606 t = build_variant_type_copy (t);
8607 sysv_va_list_type_node = t;
8609 else
8611 t = ret;
8612 if (TREE_CODE (t) != RECORD_TYPE)
8613 t = build_variant_type_copy (t);
8614 sysv_va_list_type_node = t;
8616 if (ix86_abi != MS_ABI)
8618 t = ix86_build_builtin_va_list_abi (MS_ABI);
8619 if (TREE_CODE (t) != RECORD_TYPE)
8620 t = build_variant_type_copy (t);
8621 ms_va_list_type_node = t;
8623 else
8625 t = ret;
8626 if (TREE_CODE (t) != RECORD_TYPE)
8627 t = build_variant_type_copy (t);
8628 ms_va_list_type_node = t;
8632 return ret;
8635 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8637 static void
8638 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8640 rtx save_area, mem;
8641 alias_set_type set;
8642 int i, max;
8644 /* GPR size of varargs save area. */
8645 if (cfun->va_list_gpr_size)
8646 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8647 else
8648 ix86_varargs_gpr_size = 0;
8650 /* FPR size of varargs save area. We don't need it if we don't pass
8651 anything in SSE registers. */
8652 if (TARGET_SSE && cfun->va_list_fpr_size)
8653 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8654 else
8655 ix86_varargs_fpr_size = 0;
8657 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8658 return;
8660 save_area = frame_pointer_rtx;
8661 set = get_varargs_alias_set ();
8663 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8664 if (max > X86_64_REGPARM_MAX)
8665 max = X86_64_REGPARM_MAX;
8667 for (i = cum->regno; i < max; i++)
8669 mem = gen_rtx_MEM (word_mode,
8670 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8671 MEM_NOTRAP_P (mem) = 1;
8672 set_mem_alias_set (mem, set);
8673 emit_move_insn (mem,
8674 gen_rtx_REG (word_mode,
8675 x86_64_int_parameter_registers[i]));
8678 if (ix86_varargs_fpr_size)
8680 machine_mode smode;
8681 rtx_code_label *label;
8682 rtx test;
8684 /* Now emit code to save SSE registers. The AX parameter contains number
8685 of SSE parameter registers used to call this function, though all we
8686 actually check here is the zero/non-zero status. */
8688 label = gen_label_rtx ();
8689 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8690 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8691 label));
8693 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8694 we used movdqa (i.e. TImode) instead? Perhaps even better would
8695 be if we could determine the real mode of the data, via a hook
8696 into pass_stdarg. Ignore all that for now. */
8697 smode = V4SFmode;
8698 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8699 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8701 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8702 if (max > X86_64_SSE_REGPARM_MAX)
8703 max = X86_64_SSE_REGPARM_MAX;
8705 for (i = cum->sse_regno; i < max; ++i)
8707 mem = plus_constant (Pmode, save_area,
8708 i * 16 + ix86_varargs_gpr_size);
8709 mem = gen_rtx_MEM (smode, mem);
8710 MEM_NOTRAP_P (mem) = 1;
8711 set_mem_alias_set (mem, set);
8712 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8714 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8717 emit_label (label);
8721 static void
8722 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8724 alias_set_type set = get_varargs_alias_set ();
8725 int i;
8727 /* Reset to zero, as there might be a sysv vaarg used
8728 before. */
8729 ix86_varargs_gpr_size = 0;
8730 ix86_varargs_fpr_size = 0;
8732 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8734 rtx reg, mem;
8736 mem = gen_rtx_MEM (Pmode,
8737 plus_constant (Pmode, virtual_incoming_args_rtx,
8738 i * UNITS_PER_WORD));
8739 MEM_NOTRAP_P (mem) = 1;
8740 set_mem_alias_set (mem, set);
8742 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8743 emit_move_insn (mem, reg);
8747 static void
8748 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8749 tree type, int *, int no_rtl)
8751 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8752 CUMULATIVE_ARGS next_cum;
8753 tree fntype;
8755 /* This argument doesn't appear to be used anymore. Which is good,
8756 because the old code here didn't suppress rtl generation. */
8757 gcc_assert (!no_rtl);
8759 if (!TARGET_64BIT)
8760 return;
8762 fntype = TREE_TYPE (current_function_decl);
8764 /* For varargs, we do not want to skip the dummy va_dcl argument.
8765 For stdargs, we do want to skip the last named argument. */
8766 next_cum = *cum;
8767 if (stdarg_p (fntype))
8768 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8769 true);
8771 if (cum->call_abi == MS_ABI)
8772 setup_incoming_varargs_ms_64 (&next_cum);
8773 else
8774 setup_incoming_varargs_64 (&next_cum);
8777 static void
8778 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8779 enum machine_mode mode,
8780 tree type,
8781 int *pretend_size ATTRIBUTE_UNUSED,
8782 int no_rtl)
8784 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8785 CUMULATIVE_ARGS next_cum;
8786 tree fntype;
8787 rtx save_area;
8788 int bnd_reg, i, max;
8790 gcc_assert (!no_rtl);
8792 /* Do nothing if we use plain pointer to argument area. */
8793 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8794 return;
8796 fntype = TREE_TYPE (current_function_decl);
8798 /* For varargs, we do not want to skip the dummy va_dcl argument.
8799 For stdargs, we do want to skip the last named argument. */
8800 next_cum = *cum;
8801 if (stdarg_p (fntype))
8802 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8803 true);
8804 save_area = frame_pointer_rtx;
8806 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8807 if (max > X86_64_REGPARM_MAX)
8808 max = X86_64_REGPARM_MAX;
8810 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8811 if (chkp_function_instrumented_p (current_function_decl))
8812 for (i = cum->regno; i < max; i++)
8814 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
8815 rtx reg = gen_rtx_REG (DImode,
8816 x86_64_int_parameter_registers[i]);
8817 rtx ptr = reg;
8818 rtx bounds;
8820 if (bnd_reg <= LAST_BND_REG)
8821 bounds = gen_rtx_REG (BNDmode, bnd_reg);
8822 else
8824 rtx ldx_addr =
8825 plus_constant (Pmode, arg_pointer_rtx,
8826 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
8827 bounds = gen_reg_rtx (BNDmode);
8828 emit_insn (BNDmode == BND64mode
8829 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
8830 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
8833 emit_insn (BNDmode == BND64mode
8834 ? gen_bnd64_stx (addr, ptr, bounds)
8835 : gen_bnd32_stx (addr, ptr, bounds));
8837 bnd_reg++;
8842 /* Checks if TYPE is of kind va_list char *. */
8844 static bool
8845 is_va_list_char_pointer (tree type)
8847 tree canonic;
8849 /* For 32-bit it is always true. */
8850 if (!TARGET_64BIT)
8851 return true;
8852 canonic = ix86_canonical_va_list_type (type);
8853 return (canonic == ms_va_list_type_node
8854 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8857 /* Implement va_start. */
8859 static void
8860 ix86_va_start (tree valist, rtx nextarg)
8862 HOST_WIDE_INT words, n_gpr, n_fpr;
8863 tree f_gpr, f_fpr, f_ovf, f_sav;
8864 tree gpr, fpr, ovf, sav, t;
8865 tree type;
8866 rtx ovf_rtx;
8868 if (flag_split_stack
8869 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8871 unsigned int scratch_regno;
8873 /* When we are splitting the stack, we can't refer to the stack
8874 arguments using internal_arg_pointer, because they may be on
8875 the old stack. The split stack prologue will arrange to
8876 leave a pointer to the old stack arguments in a scratch
8877 register, which we here copy to a pseudo-register. The split
8878 stack prologue can't set the pseudo-register directly because
8879 it (the prologue) runs before any registers have been saved. */
8881 scratch_regno = split_stack_prologue_scratch_regno ();
8882 if (scratch_regno != INVALID_REGNUM)
8884 rtx reg;
8885 rtx_insn *seq;
8887 reg = gen_reg_rtx (Pmode);
8888 cfun->machine->split_stack_varargs_pointer = reg;
8890 start_sequence ();
8891 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8892 seq = get_insns ();
8893 end_sequence ();
8895 push_topmost_sequence ();
8896 emit_insn_after (seq, entry_of_function ());
8897 pop_topmost_sequence ();
8901 /* Only 64bit target needs something special. */
8902 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8904 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8905 std_expand_builtin_va_start (valist, nextarg);
8906 else
8908 rtx va_r, next;
8910 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8911 next = expand_binop (ptr_mode, add_optab,
8912 cfun->machine->split_stack_varargs_pointer,
8913 crtl->args.arg_offset_rtx,
8914 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8915 convert_move (va_r, next, 0);
8917 /* Store zero bounds for va_list. */
8918 if (chkp_function_instrumented_p (current_function_decl))
8919 chkp_expand_bounds_reset_for_mem (valist,
8920 make_tree (TREE_TYPE (valist),
8921 next));
8924 return;
8927 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8928 f_fpr = DECL_CHAIN (f_gpr);
8929 f_ovf = DECL_CHAIN (f_fpr);
8930 f_sav = DECL_CHAIN (f_ovf);
8932 valist = build_simple_mem_ref (valist);
8933 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8934 /* The following should be folded into the MEM_REF offset. */
8935 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8936 f_gpr, NULL_TREE);
8937 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8938 f_fpr, NULL_TREE);
8939 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8940 f_ovf, NULL_TREE);
8941 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8942 f_sav, NULL_TREE);
8944 /* Count number of gp and fp argument registers used. */
8945 words = crtl->args.info.words;
8946 n_gpr = crtl->args.info.regno;
8947 n_fpr = crtl->args.info.sse_regno;
8949 if (cfun->va_list_gpr_size)
8951 type = TREE_TYPE (gpr);
8952 t = build2 (MODIFY_EXPR, type,
8953 gpr, build_int_cst (type, n_gpr * 8));
8954 TREE_SIDE_EFFECTS (t) = 1;
8955 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8958 if (TARGET_SSE && cfun->va_list_fpr_size)
8960 type = TREE_TYPE (fpr);
8961 t = build2 (MODIFY_EXPR, type, fpr,
8962 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8963 TREE_SIDE_EFFECTS (t) = 1;
8964 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8967 /* Find the overflow area. */
8968 type = TREE_TYPE (ovf);
8969 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8970 ovf_rtx = crtl->args.internal_arg_pointer;
8971 else
8972 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8973 t = make_tree (type, ovf_rtx);
8974 if (words != 0)
8975 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8977 /* Store zero bounds for overflow area pointer. */
8978 if (chkp_function_instrumented_p (current_function_decl))
8979 chkp_expand_bounds_reset_for_mem (ovf, t);
8981 t = build2 (MODIFY_EXPR, type, ovf, t);
8982 TREE_SIDE_EFFECTS (t) = 1;
8983 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8985 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8987 /* Find the register save area.
8988 Prologue of the function save it right above stack frame. */
8989 type = TREE_TYPE (sav);
8990 t = make_tree (type, frame_pointer_rtx);
8991 if (!ix86_varargs_gpr_size)
8992 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8994 /* Store zero bounds for save area pointer. */
8995 if (chkp_function_instrumented_p (current_function_decl))
8996 chkp_expand_bounds_reset_for_mem (sav, t);
8998 t = build2 (MODIFY_EXPR, type, sav, t);
8999 TREE_SIDE_EFFECTS (t) = 1;
9000 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9004 /* Implement va_arg. */
9006 static tree
9007 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9008 gimple_seq *post_p)
9010 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
9011 tree f_gpr, f_fpr, f_ovf, f_sav;
9012 tree gpr, fpr, ovf, sav, t;
9013 int size, rsize;
9014 tree lab_false, lab_over = NULL_TREE;
9015 tree addr, t2;
9016 rtx container;
9017 int indirect_p = 0;
9018 tree ptrtype;
9019 machine_mode nat_mode;
9020 unsigned int arg_boundary;
9022 /* Only 64bit target needs something special. */
9023 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
9024 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
9026 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9027 f_fpr = DECL_CHAIN (f_gpr);
9028 f_ovf = DECL_CHAIN (f_fpr);
9029 f_sav = DECL_CHAIN (f_ovf);
9031 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
9032 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
9033 valist = build_va_arg_indirect_ref (valist);
9034 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9035 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9036 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9038 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
9039 if (indirect_p)
9040 type = build_pointer_type (type);
9041 size = int_size_in_bytes (type);
9042 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
9044 nat_mode = type_natural_mode (type, NULL, false);
9045 switch (nat_mode)
9047 case V8SFmode:
9048 case V8SImode:
9049 case V32QImode:
9050 case V16HImode:
9051 case V4DFmode:
9052 case V4DImode:
9053 case V16SFmode:
9054 case V16SImode:
9055 case V64QImode:
9056 case V32HImode:
9057 case V8DFmode:
9058 case V8DImode:
9059 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9060 if (!TARGET_64BIT_MS_ABI)
9062 container = NULL;
9063 break;
9066 default:
9067 container = construct_container (nat_mode, TYPE_MODE (type),
9068 type, 0, X86_64_REGPARM_MAX,
9069 X86_64_SSE_REGPARM_MAX, intreg,
9071 break;
9074 /* Pull the value out of the saved registers. */
9076 addr = create_tmp_var (ptr_type_node, "addr");
9078 if (container)
9080 int needed_intregs, needed_sseregs;
9081 bool need_temp;
9082 tree int_addr, sse_addr;
9084 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9085 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9087 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
9089 need_temp = (!REG_P (container)
9090 && ((needed_intregs && TYPE_ALIGN (type) > 64)
9091 || TYPE_ALIGN (type) > 128));
9093 /* In case we are passing structure, verify that it is consecutive block
9094 on the register save area. If not we need to do moves. */
9095 if (!need_temp && !REG_P (container))
9097 /* Verify that all registers are strictly consecutive */
9098 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
9100 int i;
9102 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9104 rtx slot = XVECEXP (container, 0, i);
9105 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9106 || INTVAL (XEXP (slot, 1)) != i * 16)
9107 need_temp = true;
9110 else
9112 int i;
9114 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9116 rtx slot = XVECEXP (container, 0, i);
9117 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9118 || INTVAL (XEXP (slot, 1)) != i * 8)
9119 need_temp = true;
9123 if (!need_temp)
9125 int_addr = addr;
9126 sse_addr = addr;
9128 else
9130 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9131 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9134 /* First ensure that we fit completely in registers. */
9135 if (needed_intregs)
9137 t = build_int_cst (TREE_TYPE (gpr),
9138 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9139 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9140 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9141 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9142 gimplify_and_add (t, pre_p);
9144 if (needed_sseregs)
9146 t = build_int_cst (TREE_TYPE (fpr),
9147 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9148 + X86_64_REGPARM_MAX * 8);
9149 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9150 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9151 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9152 gimplify_and_add (t, pre_p);
9155 /* Compute index to start of area used for integer regs. */
9156 if (needed_intregs)
9158 /* int_addr = gpr + sav; */
9159 t = fold_build_pointer_plus (sav, gpr);
9160 gimplify_assign (int_addr, t, pre_p);
9162 if (needed_sseregs)
9164 /* sse_addr = fpr + sav; */
9165 t = fold_build_pointer_plus (sav, fpr);
9166 gimplify_assign (sse_addr, t, pre_p);
9168 if (need_temp)
9170 int i, prev_size = 0;
9171 tree temp = create_tmp_var (type, "va_arg_tmp");
9173 /* addr = &temp; */
9174 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9175 gimplify_assign (addr, t, pre_p);
9177 for (i = 0; i < XVECLEN (container, 0); i++)
9179 rtx slot = XVECEXP (container, 0, i);
9180 rtx reg = XEXP (slot, 0);
9181 machine_mode mode = GET_MODE (reg);
9182 tree piece_type;
9183 tree addr_type;
9184 tree daddr_type;
9185 tree src_addr, src;
9186 int src_offset;
9187 tree dest_addr, dest;
9188 int cur_size = GET_MODE_SIZE (mode);
9190 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9191 prev_size = INTVAL (XEXP (slot, 1));
9192 if (prev_size + cur_size > size)
9194 cur_size = size - prev_size;
9195 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9196 if (mode == BLKmode)
9197 mode = QImode;
9199 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9200 if (mode == GET_MODE (reg))
9201 addr_type = build_pointer_type (piece_type);
9202 else
9203 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9204 true);
9205 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9206 true);
9208 if (SSE_REGNO_P (REGNO (reg)))
9210 src_addr = sse_addr;
9211 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9213 else
9215 src_addr = int_addr;
9216 src_offset = REGNO (reg) * 8;
9218 src_addr = fold_convert (addr_type, src_addr);
9219 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9221 dest_addr = fold_convert (daddr_type, addr);
9222 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9223 if (cur_size == GET_MODE_SIZE (mode))
9225 src = build_va_arg_indirect_ref (src_addr);
9226 dest = build_va_arg_indirect_ref (dest_addr);
9228 gimplify_assign (dest, src, pre_p);
9230 else
9232 tree copy
9233 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9234 3, dest_addr, src_addr,
9235 size_int (cur_size));
9236 gimplify_and_add (copy, pre_p);
9238 prev_size += cur_size;
9242 if (needed_intregs)
9244 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9245 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9246 gimplify_assign (gpr, t, pre_p);
9249 if (needed_sseregs)
9251 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9252 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9253 gimplify_assign (fpr, t, pre_p);
9256 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9258 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9261 /* ... otherwise out of the overflow area. */
9263 /* When we align parameter on stack for caller, if the parameter
9264 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9265 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9266 here with caller. */
9267 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9268 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9269 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9271 /* Care for on-stack alignment if needed. */
9272 if (arg_boundary <= 64 || size == 0)
9273 t = ovf;
9274 else
9276 HOST_WIDE_INT align = arg_boundary / 8;
9277 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9278 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9279 build_int_cst (TREE_TYPE (t), -align));
9282 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9283 gimplify_assign (addr, t, pre_p);
9285 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9286 gimplify_assign (unshare_expr (ovf), t, pre_p);
9288 if (container)
9289 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9291 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9292 addr = fold_convert (ptrtype, addr);
9294 if (indirect_p)
9295 addr = build_va_arg_indirect_ref (addr);
9296 return build_va_arg_indirect_ref (addr);
9299 /* Return true if OPNUM's MEM should be matched
9300 in movabs* patterns. */
9302 bool
9303 ix86_check_movabs (rtx insn, int opnum)
9305 rtx set, mem;
9307 set = PATTERN (insn);
9308 if (GET_CODE (set) == PARALLEL)
9309 set = XVECEXP (set, 0, 0);
9310 gcc_assert (GET_CODE (set) == SET);
9311 mem = XEXP (set, opnum);
9312 while (GET_CODE (mem) == SUBREG)
9313 mem = SUBREG_REG (mem);
9314 gcc_assert (MEM_P (mem));
9315 return volatile_ok || !MEM_VOLATILE_P (mem);
9318 /* Initialize the table of extra 80387 mathematical constants. */
9320 static void
9321 init_ext_80387_constants (void)
9323 static const char * cst[5] =
9325 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9326 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9327 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9328 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9329 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9331 int i;
9333 for (i = 0; i < 5; i++)
9335 real_from_string (&ext_80387_constants_table[i], cst[i]);
9336 /* Ensure each constant is rounded to XFmode precision. */
9337 real_convert (&ext_80387_constants_table[i],
9338 XFmode, &ext_80387_constants_table[i]);
9341 ext_80387_constants_init = 1;
9344 /* Return non-zero if the constant is something that
9345 can be loaded with a special instruction. */
9348 standard_80387_constant_p (rtx x)
9350 machine_mode mode = GET_MODE (x);
9352 REAL_VALUE_TYPE r;
9354 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
9355 return -1;
9357 if (x == CONST0_RTX (mode))
9358 return 1;
9359 if (x == CONST1_RTX (mode))
9360 return 2;
9362 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9364 /* For XFmode constants, try to find a special 80387 instruction when
9365 optimizing for size or on those CPUs that benefit from them. */
9366 if (mode == XFmode
9367 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9369 int i;
9371 if (! ext_80387_constants_init)
9372 init_ext_80387_constants ();
9374 for (i = 0; i < 5; i++)
9375 if (real_identical (&r, &ext_80387_constants_table[i]))
9376 return i + 3;
9379 /* Load of the constant -0.0 or -1.0 will be split as
9380 fldz;fchs or fld1;fchs sequence. */
9381 if (real_isnegzero (&r))
9382 return 8;
9383 if (real_identical (&r, &dconstm1))
9384 return 9;
9386 return 0;
9389 /* Return the opcode of the special instruction to be used to load
9390 the constant X. */
9392 const char *
9393 standard_80387_constant_opcode (rtx x)
9395 switch (standard_80387_constant_p (x))
9397 case 1:
9398 return "fldz";
9399 case 2:
9400 return "fld1";
9401 case 3:
9402 return "fldlg2";
9403 case 4:
9404 return "fldln2";
9405 case 5:
9406 return "fldl2e";
9407 case 6:
9408 return "fldl2t";
9409 case 7:
9410 return "fldpi";
9411 case 8:
9412 case 9:
9413 return "#";
9414 default:
9415 gcc_unreachable ();
9419 /* Return the CONST_DOUBLE representing the 80387 constant that is
9420 loaded by the specified special instruction. The argument IDX
9421 matches the return value from standard_80387_constant_p. */
9424 standard_80387_constant_rtx (int idx)
9426 int i;
9428 if (! ext_80387_constants_init)
9429 init_ext_80387_constants ();
9431 switch (idx)
9433 case 3:
9434 case 4:
9435 case 5:
9436 case 6:
9437 case 7:
9438 i = idx - 3;
9439 break;
9441 default:
9442 gcc_unreachable ();
9445 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9446 XFmode);
9449 /* Return 1 if X is all 0s and 2 if x is all 1s
9450 in supported SSE/AVX vector mode. */
9453 standard_sse_constant_p (rtx x)
9455 machine_mode mode = GET_MODE (x);
9457 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
9458 return 1;
9459 if (vector_all_ones_operand (x, mode))
9460 switch (mode)
9462 case V16QImode:
9463 case V8HImode:
9464 case V4SImode:
9465 case V2DImode:
9466 if (TARGET_SSE2)
9467 return 2;
9468 case V32QImode:
9469 case V16HImode:
9470 case V8SImode:
9471 case V4DImode:
9472 if (TARGET_AVX2)
9473 return 2;
9474 case V64QImode:
9475 case V32HImode:
9476 case V16SImode:
9477 case V8DImode:
9478 if (TARGET_AVX512F)
9479 return 2;
9480 default:
9481 break;
9484 return 0;
9487 /* Return the opcode of the special instruction to be used to load
9488 the constant X. */
9490 const char *
9491 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9493 switch (standard_sse_constant_p (x))
9495 case 1:
9496 switch (get_attr_mode (insn))
9498 case MODE_XI:
9499 return "vpxord\t%g0, %g0, %g0";
9500 case MODE_V16SF:
9501 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9502 : "vpxord\t%g0, %g0, %g0";
9503 case MODE_V8DF:
9504 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9505 : "vpxorq\t%g0, %g0, %g0";
9506 case MODE_TI:
9507 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9508 : "%vpxor\t%0, %d0";
9509 case MODE_V2DF:
9510 return "%vxorpd\t%0, %d0";
9511 case MODE_V4SF:
9512 return "%vxorps\t%0, %d0";
9514 case MODE_OI:
9515 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9516 : "vpxor\t%x0, %x0, %x0";
9517 case MODE_V4DF:
9518 return "vxorpd\t%x0, %x0, %x0";
9519 case MODE_V8SF:
9520 return "vxorps\t%x0, %x0, %x0";
9522 default:
9523 break;
9526 case 2:
9527 if (TARGET_AVX512VL
9528 || get_attr_mode (insn) == MODE_XI
9529 || get_attr_mode (insn) == MODE_V8DF
9530 || get_attr_mode (insn) == MODE_V16SF)
9531 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9532 if (TARGET_AVX)
9533 return "vpcmpeqd\t%0, %0, %0";
9534 else
9535 return "pcmpeqd\t%0, %0";
9537 default:
9538 break;
9540 gcc_unreachable ();
9543 /* Returns true if OP contains a symbol reference */
9545 bool
9546 symbolic_reference_mentioned_p (rtx op)
9548 const char *fmt;
9549 int i;
9551 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9552 return true;
9554 fmt = GET_RTX_FORMAT (GET_CODE (op));
9555 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9557 if (fmt[i] == 'E')
9559 int j;
9561 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9562 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9563 return true;
9566 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9567 return true;
9570 return false;
9573 /* Return true if it is appropriate to emit `ret' instructions in the
9574 body of a function. Do this only if the epilogue is simple, needing a
9575 couple of insns. Prior to reloading, we can't tell how many registers
9576 must be saved, so return false then. Return false if there is no frame
9577 marker to de-allocate. */
9579 bool
9580 ix86_can_use_return_insn_p (void)
9582 struct ix86_frame frame;
9584 if (! reload_completed || frame_pointer_needed)
9585 return 0;
9587 /* Don't allow more than 32k pop, since that's all we can do
9588 with one instruction. */
9589 if (crtl->args.pops_args && crtl->args.size >= 32768)
9590 return 0;
9592 ix86_compute_frame_layout (&frame);
9593 return (frame.stack_pointer_offset == UNITS_PER_WORD
9594 && (frame.nregs + frame.nsseregs) == 0);
9597 /* Value should be nonzero if functions must have frame pointers.
9598 Zero means the frame pointer need not be set up (and parms may
9599 be accessed via the stack pointer) in functions that seem suitable. */
9601 static bool
9602 ix86_frame_pointer_required (void)
9604 /* If we accessed previous frames, then the generated code expects
9605 to be able to access the saved ebp value in our frame. */
9606 if (cfun->machine->accesses_prev_frame)
9607 return true;
9609 /* Several x86 os'es need a frame pointer for other reasons,
9610 usually pertaining to setjmp. */
9611 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9612 return true;
9614 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9615 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9616 return true;
9618 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9619 allocation is 4GB. */
9620 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9621 return true;
9623 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9624 turns off the frame pointer by default. Turn it back on now if
9625 we've not got a leaf function. */
9626 if (TARGET_OMIT_LEAF_FRAME_POINTER
9627 && (!crtl->is_leaf
9628 || ix86_current_function_calls_tls_descriptor))
9629 return true;
9631 if (crtl->profile && !flag_fentry)
9632 return true;
9634 return false;
9637 /* Record that the current function accesses previous call frames. */
9639 void
9640 ix86_setup_frame_addresses (void)
9642 cfun->machine->accesses_prev_frame = 1;
9645 #ifndef USE_HIDDEN_LINKONCE
9646 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9647 # define USE_HIDDEN_LINKONCE 1
9648 # else
9649 # define USE_HIDDEN_LINKONCE 0
9650 # endif
9651 #endif
9653 static int pic_labels_used;
9655 /* Fills in the label name that should be used for a pc thunk for
9656 the given register. */
9658 static void
9659 get_pc_thunk_name (char name[32], unsigned int regno)
9661 gcc_assert (!TARGET_64BIT);
9663 if (USE_HIDDEN_LINKONCE)
9664 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9665 else
9666 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9670 /* This function generates code for -fpic that loads %ebx with
9671 the return address of the caller and then returns. */
9673 static void
9674 ix86_code_end (void)
9676 rtx xops[2];
9677 int regno;
9679 for (regno = AX_REG; regno <= SP_REG; regno++)
9681 char name[32];
9682 tree decl;
9684 if (!(pic_labels_used & (1 << regno)))
9685 continue;
9687 get_pc_thunk_name (name, regno);
9689 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9690 get_identifier (name),
9691 build_function_type_list (void_type_node, NULL_TREE));
9692 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9693 NULL_TREE, void_type_node);
9694 TREE_PUBLIC (decl) = 1;
9695 TREE_STATIC (decl) = 1;
9696 DECL_IGNORED_P (decl) = 1;
9698 #if TARGET_MACHO
9699 if (TARGET_MACHO)
9701 switch_to_section (darwin_sections[text_coal_section]);
9702 fputs ("\t.weak_definition\t", asm_out_file);
9703 assemble_name (asm_out_file, name);
9704 fputs ("\n\t.private_extern\t", asm_out_file);
9705 assemble_name (asm_out_file, name);
9706 putc ('\n', asm_out_file);
9707 ASM_OUTPUT_LABEL (asm_out_file, name);
9708 DECL_WEAK (decl) = 1;
9710 else
9711 #endif
9712 if (USE_HIDDEN_LINKONCE)
9714 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9716 targetm.asm_out.unique_section (decl, 0);
9717 switch_to_section (get_named_section (decl, NULL, 0));
9719 targetm.asm_out.globalize_label (asm_out_file, name);
9720 fputs ("\t.hidden\t", asm_out_file);
9721 assemble_name (asm_out_file, name);
9722 putc ('\n', asm_out_file);
9723 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9725 else
9727 switch_to_section (text_section);
9728 ASM_OUTPUT_LABEL (asm_out_file, name);
9731 DECL_INITIAL (decl) = make_node (BLOCK);
9732 current_function_decl = decl;
9733 init_function_start (decl);
9734 first_function_block_is_cold = false;
9735 /* Make sure unwind info is emitted for the thunk if needed. */
9736 final_start_function (emit_barrier (), asm_out_file, 1);
9738 /* Pad stack IP move with 4 instructions (two NOPs count
9739 as one instruction). */
9740 if (TARGET_PAD_SHORT_FUNCTION)
9742 int i = 8;
9744 while (i--)
9745 fputs ("\tnop\n", asm_out_file);
9748 xops[0] = gen_rtx_REG (Pmode, regno);
9749 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9750 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9751 output_asm_insn ("%!ret", NULL);
9752 final_end_function ();
9753 init_insn_lengths ();
9754 free_after_compilation (cfun);
9755 set_cfun (NULL);
9756 current_function_decl = NULL;
9759 if (flag_split_stack)
9760 file_end_indicate_split_stack ();
9763 /* Emit code for the SET_GOT patterns. */
9765 const char *
9766 output_set_got (rtx dest, rtx label)
9768 rtx xops[3];
9770 xops[0] = dest;
9772 if (TARGET_VXWORKS_RTP && flag_pic)
9774 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9775 xops[2] = gen_rtx_MEM (Pmode,
9776 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9777 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9779 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9780 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9781 an unadorned address. */
9782 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9783 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9784 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9785 return "";
9788 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9790 if (!flag_pic)
9792 if (TARGET_MACHO)
9793 /* We don't need a pic base, we're not producing pic. */
9794 gcc_unreachable ();
9796 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9797 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9798 targetm.asm_out.internal_label (asm_out_file, "L",
9799 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9801 else
9803 char name[32];
9804 get_pc_thunk_name (name, REGNO (dest));
9805 pic_labels_used |= 1 << REGNO (dest);
9807 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9808 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9809 output_asm_insn ("%!call\t%X2", xops);
9811 #if TARGET_MACHO
9812 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9813 This is what will be referenced by the Mach-O PIC subsystem. */
9814 if (machopic_should_output_picbase_label () || !label)
9815 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9817 /* When we are restoring the pic base at the site of a nonlocal label,
9818 and we decided to emit the pic base above, we will still output a
9819 local label used for calculating the correction offset (even though
9820 the offset will be 0 in that case). */
9821 if (label)
9822 targetm.asm_out.internal_label (asm_out_file, "L",
9823 CODE_LABEL_NUMBER (label));
9824 #endif
9827 if (!TARGET_MACHO)
9828 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9830 return "";
9833 /* Generate an "push" pattern for input ARG. */
9835 static rtx
9836 gen_push (rtx arg)
9838 struct machine_function *m = cfun->machine;
9840 if (m->fs.cfa_reg == stack_pointer_rtx)
9841 m->fs.cfa_offset += UNITS_PER_WORD;
9842 m->fs.sp_offset += UNITS_PER_WORD;
9844 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9845 arg = gen_rtx_REG (word_mode, REGNO (arg));
9847 return gen_rtx_SET (VOIDmode,
9848 gen_rtx_MEM (word_mode,
9849 gen_rtx_PRE_DEC (Pmode,
9850 stack_pointer_rtx)),
9851 arg);
9854 /* Generate an "pop" pattern for input ARG. */
9856 static rtx
9857 gen_pop (rtx arg)
9859 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9860 arg = gen_rtx_REG (word_mode, REGNO (arg));
9862 return gen_rtx_SET (VOIDmode,
9863 arg,
9864 gen_rtx_MEM (word_mode,
9865 gen_rtx_POST_INC (Pmode,
9866 stack_pointer_rtx)));
9869 /* Return >= 0 if there is an unused call-clobbered register available
9870 for the entire function. */
9872 static unsigned int
9873 ix86_select_alt_pic_regnum (void)
9875 if (ix86_use_pseudo_pic_reg ())
9876 return INVALID_REGNUM;
9878 if (crtl->is_leaf
9879 && !crtl->profile
9880 && !ix86_current_function_calls_tls_descriptor)
9882 int i, drap;
9883 /* Can't use the same register for both PIC and DRAP. */
9884 if (crtl->drap_reg)
9885 drap = REGNO (crtl->drap_reg);
9886 else
9887 drap = -1;
9888 for (i = 2; i >= 0; --i)
9889 if (i != drap && !df_regs_ever_live_p (i))
9890 return i;
9893 return INVALID_REGNUM;
9896 /* Return TRUE if we need to save REGNO. */
9898 static bool
9899 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9901 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
9902 && pic_offset_table_rtx)
9904 if (ix86_use_pseudo_pic_reg ())
9906 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
9907 _mcount in prologue. */
9908 if (!TARGET_64BIT && flag_pic && crtl->profile)
9909 return true;
9911 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9912 || crtl->profile
9913 || crtl->calls_eh_return
9914 || crtl->uses_const_pool
9915 || cfun->has_nonlocal_label)
9916 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9919 if (crtl->calls_eh_return && maybe_eh_return)
9921 unsigned i;
9922 for (i = 0; ; i++)
9924 unsigned test = EH_RETURN_DATA_REGNO (i);
9925 if (test == INVALID_REGNUM)
9926 break;
9927 if (test == regno)
9928 return true;
9932 if (crtl->drap_reg
9933 && regno == REGNO (crtl->drap_reg)
9934 && !cfun->machine->no_drap_save_restore)
9935 return true;
9937 return (df_regs_ever_live_p (regno)
9938 && !call_used_regs[regno]
9939 && !fixed_regs[regno]
9940 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9943 /* Return number of saved general prupose registers. */
9945 static int
9946 ix86_nsaved_regs (void)
9948 int nregs = 0;
9949 int regno;
9951 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9952 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9953 nregs ++;
9954 return nregs;
9957 /* Return number of saved SSE registrers. */
9959 static int
9960 ix86_nsaved_sseregs (void)
9962 int nregs = 0;
9963 int regno;
9965 if (!TARGET_64BIT_MS_ABI)
9966 return 0;
9967 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9968 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9969 nregs ++;
9970 return nregs;
9973 /* Given FROM and TO register numbers, say whether this elimination is
9974 allowed. If stack alignment is needed, we can only replace argument
9975 pointer with hard frame pointer, or replace frame pointer with stack
9976 pointer. Otherwise, frame pointer elimination is automatically
9977 handled and all other eliminations are valid. */
9979 static bool
9980 ix86_can_eliminate (const int from, const int to)
9982 if (stack_realign_fp)
9983 return ((from == ARG_POINTER_REGNUM
9984 && to == HARD_FRAME_POINTER_REGNUM)
9985 || (from == FRAME_POINTER_REGNUM
9986 && to == STACK_POINTER_REGNUM));
9987 else
9988 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9991 /* Return the offset between two registers, one to be eliminated, and the other
9992 its replacement, at the start of a routine. */
9994 HOST_WIDE_INT
9995 ix86_initial_elimination_offset (int from, int to)
9997 struct ix86_frame frame;
9998 ix86_compute_frame_layout (&frame);
10000 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10001 return frame.hard_frame_pointer_offset;
10002 else if (from == FRAME_POINTER_REGNUM
10003 && to == HARD_FRAME_POINTER_REGNUM)
10004 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
10005 else
10007 gcc_assert (to == STACK_POINTER_REGNUM);
10009 if (from == ARG_POINTER_REGNUM)
10010 return frame.stack_pointer_offset;
10012 gcc_assert (from == FRAME_POINTER_REGNUM);
10013 return frame.stack_pointer_offset - frame.frame_pointer_offset;
10017 /* In a dynamically-aligned function, we can't know the offset from
10018 stack pointer to frame pointer, so we must ensure that setjmp
10019 eliminates fp against the hard fp (%ebp) rather than trying to
10020 index from %esp up to the top of the frame across a gap that is
10021 of unknown (at compile-time) size. */
10022 static rtx
10023 ix86_builtin_setjmp_frame_value (void)
10025 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
10028 /* When using -fsplit-stack, the allocation routines set a field in
10029 the TCB to the bottom of the stack plus this much space, measured
10030 in bytes. */
10032 #define SPLIT_STACK_AVAILABLE 256
10034 /* Fill structure ix86_frame about frame of currently computed function. */
10036 static void
10037 ix86_compute_frame_layout (struct ix86_frame *frame)
10039 unsigned HOST_WIDE_INT stack_alignment_needed;
10040 HOST_WIDE_INT offset;
10041 unsigned HOST_WIDE_INT preferred_alignment;
10042 HOST_WIDE_INT size = get_frame_size ();
10043 HOST_WIDE_INT to_allocate;
10045 frame->nregs = ix86_nsaved_regs ();
10046 frame->nsseregs = ix86_nsaved_sseregs ();
10048 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
10049 function prologues and leaf. */
10050 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
10051 && (!crtl->is_leaf || cfun->calls_alloca != 0
10052 || ix86_current_function_calls_tls_descriptor))
10054 crtl->preferred_stack_boundary = 128;
10055 crtl->stack_alignment_needed = 128;
10057 /* preferred_stack_boundary is never updated for call
10058 expanded from tls descriptor. Update it here. We don't update it in
10059 expand stage because according to the comments before
10060 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
10061 away. */
10062 else if (ix86_current_function_calls_tls_descriptor
10063 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
10065 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
10066 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
10067 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
10070 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
10071 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
10073 gcc_assert (!size || stack_alignment_needed);
10074 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
10075 gcc_assert (preferred_alignment <= stack_alignment_needed);
10077 /* For SEH we have to limit the amount of code movement into the prologue.
10078 At present we do this via a BLOCKAGE, at which point there's very little
10079 scheduling that can be done, which means that there's very little point
10080 in doing anything except PUSHs. */
10081 if (TARGET_SEH)
10082 cfun->machine->use_fast_prologue_epilogue = false;
10084 /* During reload iteration the amount of registers saved can change.
10085 Recompute the value as needed. Do not recompute when amount of registers
10086 didn't change as reload does multiple calls to the function and does not
10087 expect the decision to change within single iteration. */
10088 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
10089 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
10091 int count = frame->nregs;
10092 struct cgraph_node *node = cgraph_node::get (current_function_decl);
10094 cfun->machine->use_fast_prologue_epilogue_nregs = count;
10096 /* The fast prologue uses move instead of push to save registers. This
10097 is significantly longer, but also executes faster as modern hardware
10098 can execute the moves in parallel, but can't do that for push/pop.
10100 Be careful about choosing what prologue to emit: When function takes
10101 many instructions to execute we may use slow version as well as in
10102 case function is known to be outside hot spot (this is known with
10103 feedback only). Weight the size of function by number of registers
10104 to save as it is cheap to use one or two push instructions but very
10105 slow to use many of them. */
10106 if (count)
10107 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
10108 if (node->frequency < NODE_FREQUENCY_NORMAL
10109 || (flag_branch_probabilities
10110 && node->frequency < NODE_FREQUENCY_HOT))
10111 cfun->machine->use_fast_prologue_epilogue = false;
10112 else
10113 cfun->machine->use_fast_prologue_epilogue
10114 = !expensive_function_p (count);
10117 frame->save_regs_using_mov
10118 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
10119 /* If static stack checking is enabled and done with probes,
10120 the registers need to be saved before allocating the frame. */
10121 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
10123 /* Skip return address. */
10124 offset = UNITS_PER_WORD;
10126 /* Skip pushed static chain. */
10127 if (ix86_static_chain_on_stack)
10128 offset += UNITS_PER_WORD;
10130 /* Skip saved base pointer. */
10131 if (frame_pointer_needed)
10132 offset += UNITS_PER_WORD;
10133 frame->hfp_save_offset = offset;
10135 /* The traditional frame pointer location is at the top of the frame. */
10136 frame->hard_frame_pointer_offset = offset;
10138 /* Register save area */
10139 offset += frame->nregs * UNITS_PER_WORD;
10140 frame->reg_save_offset = offset;
10142 /* On SEH target, registers are pushed just before the frame pointer
10143 location. */
10144 if (TARGET_SEH)
10145 frame->hard_frame_pointer_offset = offset;
10147 /* Align and set SSE register save area. */
10148 if (frame->nsseregs)
10150 /* The only ABI that has saved SSE registers (Win64) also has a
10151 16-byte aligned default stack, and thus we don't need to be
10152 within the re-aligned local stack frame to save them. */
10153 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
10154 offset = (offset + 16 - 1) & -16;
10155 offset += frame->nsseregs * 16;
10157 frame->sse_reg_save_offset = offset;
10159 /* The re-aligned stack starts here. Values before this point are not
10160 directly comparable with values below this point. In order to make
10161 sure that no value happens to be the same before and after, force
10162 the alignment computation below to add a non-zero value. */
10163 if (stack_realign_fp)
10164 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
10166 /* Va-arg area */
10167 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10168 offset += frame->va_arg_size;
10170 /* Align start of frame for local function. */
10171 if (stack_realign_fp
10172 || offset != frame->sse_reg_save_offset
10173 || size != 0
10174 || !crtl->is_leaf
10175 || cfun->calls_alloca
10176 || ix86_current_function_calls_tls_descriptor)
10177 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10179 /* Frame pointer points here. */
10180 frame->frame_pointer_offset = offset;
10182 offset += size;
10184 /* Add outgoing arguments area. Can be skipped if we eliminated
10185 all the function calls as dead code.
10186 Skipping is however impossible when function calls alloca. Alloca
10187 expander assumes that last crtl->outgoing_args_size
10188 of stack frame are unused. */
10189 if (ACCUMULATE_OUTGOING_ARGS
10190 && (!crtl->is_leaf || cfun->calls_alloca
10191 || ix86_current_function_calls_tls_descriptor))
10193 offset += crtl->outgoing_args_size;
10194 frame->outgoing_arguments_size = crtl->outgoing_args_size;
10196 else
10197 frame->outgoing_arguments_size = 0;
10199 /* Align stack boundary. Only needed if we're calling another function
10200 or using alloca. */
10201 if (!crtl->is_leaf || cfun->calls_alloca
10202 || ix86_current_function_calls_tls_descriptor)
10203 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10205 /* We've reached end of stack frame. */
10206 frame->stack_pointer_offset = offset;
10208 /* Size prologue needs to allocate. */
10209 to_allocate = offset - frame->sse_reg_save_offset;
10211 if ((!to_allocate && frame->nregs <= 1)
10212 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10213 frame->save_regs_using_mov = false;
10215 if (ix86_using_red_zone ()
10216 && crtl->sp_is_unchanging
10217 && crtl->is_leaf
10218 && !ix86_current_function_calls_tls_descriptor)
10220 frame->red_zone_size = to_allocate;
10221 if (frame->save_regs_using_mov)
10222 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10223 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10224 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10226 else
10227 frame->red_zone_size = 0;
10228 frame->stack_pointer_offset -= frame->red_zone_size;
10230 /* The SEH frame pointer location is near the bottom of the frame.
10231 This is enforced by the fact that the difference between the
10232 stack pointer and the frame pointer is limited to 240 bytes in
10233 the unwind data structure. */
10234 if (TARGET_SEH)
10236 HOST_WIDE_INT diff;
10238 /* If we can leave the frame pointer where it is, do so. Also, returns
10239 the establisher frame for __builtin_frame_address (0). */
10240 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10241 if (diff <= SEH_MAX_FRAME_SIZE
10242 && (diff > 240 || (diff & 15) != 0)
10243 && !crtl->accesses_prior_frames)
10245 /* Ideally we'd determine what portion of the local stack frame
10246 (within the constraint of the lowest 240) is most heavily used.
10247 But without that complication, simply bias the frame pointer
10248 by 128 bytes so as to maximize the amount of the local stack
10249 frame that is addressable with 8-bit offsets. */
10250 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10255 /* This is semi-inlined memory_address_length, but simplified
10256 since we know that we're always dealing with reg+offset, and
10257 to avoid having to create and discard all that rtl. */
10259 static inline int
10260 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10262 int len = 4;
10264 if (offset == 0)
10266 /* EBP and R13 cannot be encoded without an offset. */
10267 len = (regno == BP_REG || regno == R13_REG);
10269 else if (IN_RANGE (offset, -128, 127))
10270 len = 1;
10272 /* ESP and R12 must be encoded with a SIB byte. */
10273 if (regno == SP_REG || regno == R12_REG)
10274 len++;
10276 return len;
10279 /* Return an RTX that points to CFA_OFFSET within the stack frame.
10280 The valid base registers are taken from CFUN->MACHINE->FS. */
10282 static rtx
10283 choose_baseaddr (HOST_WIDE_INT cfa_offset)
10285 const struct machine_function *m = cfun->machine;
10286 rtx base_reg = NULL;
10287 HOST_WIDE_INT base_offset = 0;
10289 if (m->use_fast_prologue_epilogue)
10291 /* Choose the base register most likely to allow the most scheduling
10292 opportunities. Generally FP is valid throughout the function,
10293 while DRAP must be reloaded within the epilogue. But choose either
10294 over the SP due to increased encoding size. */
10296 if (m->fs.fp_valid)
10298 base_reg = hard_frame_pointer_rtx;
10299 base_offset = m->fs.fp_offset - cfa_offset;
10301 else if (m->fs.drap_valid)
10303 base_reg = crtl->drap_reg;
10304 base_offset = 0 - cfa_offset;
10306 else if (m->fs.sp_valid)
10308 base_reg = stack_pointer_rtx;
10309 base_offset = m->fs.sp_offset - cfa_offset;
10312 else
10314 HOST_WIDE_INT toffset;
10315 int len = 16, tlen;
10317 /* Choose the base register with the smallest address encoding.
10318 With a tie, choose FP > DRAP > SP. */
10319 if (m->fs.sp_valid)
10321 base_reg = stack_pointer_rtx;
10322 base_offset = m->fs.sp_offset - cfa_offset;
10323 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10325 if (m->fs.drap_valid)
10327 toffset = 0 - cfa_offset;
10328 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10329 if (tlen <= len)
10331 base_reg = crtl->drap_reg;
10332 base_offset = toffset;
10333 len = tlen;
10336 if (m->fs.fp_valid)
10338 toffset = m->fs.fp_offset - cfa_offset;
10339 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10340 if (tlen <= len)
10342 base_reg = hard_frame_pointer_rtx;
10343 base_offset = toffset;
10344 len = tlen;
10348 gcc_assert (base_reg != NULL);
10350 return plus_constant (Pmode, base_reg, base_offset);
10353 /* Emit code to save registers in the prologue. */
10355 static void
10356 ix86_emit_save_regs (void)
10358 unsigned int regno;
10359 rtx insn;
10361 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10362 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10364 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10365 RTX_FRAME_RELATED_P (insn) = 1;
10369 /* Emit a single register save at CFA - CFA_OFFSET. */
10371 static void
10372 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10373 HOST_WIDE_INT cfa_offset)
10375 struct machine_function *m = cfun->machine;
10376 rtx reg = gen_rtx_REG (mode, regno);
10377 rtx mem, addr, base, insn;
10379 addr = choose_baseaddr (cfa_offset);
10380 mem = gen_frame_mem (mode, addr);
10382 /* For SSE saves, we need to indicate the 128-bit alignment. */
10383 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
10385 insn = emit_move_insn (mem, reg);
10386 RTX_FRAME_RELATED_P (insn) = 1;
10388 base = addr;
10389 if (GET_CODE (base) == PLUS)
10390 base = XEXP (base, 0);
10391 gcc_checking_assert (REG_P (base));
10393 /* When saving registers into a re-aligned local stack frame, avoid
10394 any tricky guessing by dwarf2out. */
10395 if (m->fs.realigned)
10397 gcc_checking_assert (stack_realign_drap);
10399 if (regno == REGNO (crtl->drap_reg))
10401 /* A bit of a hack. We force the DRAP register to be saved in
10402 the re-aligned stack frame, which provides us with a copy
10403 of the CFA that will last past the prologue. Install it. */
10404 gcc_checking_assert (cfun->machine->fs.fp_valid);
10405 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10406 cfun->machine->fs.fp_offset - cfa_offset);
10407 mem = gen_rtx_MEM (mode, addr);
10408 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10410 else
10412 /* The frame pointer is a stable reference within the
10413 aligned frame. Use it. */
10414 gcc_checking_assert (cfun->machine->fs.fp_valid);
10415 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10416 cfun->machine->fs.fp_offset - cfa_offset);
10417 mem = gen_rtx_MEM (mode, addr);
10418 add_reg_note (insn, REG_CFA_EXPRESSION,
10419 gen_rtx_SET (VOIDmode, mem, reg));
10423 /* The memory may not be relative to the current CFA register,
10424 which means that we may need to generate a new pattern for
10425 use by the unwind info. */
10426 else if (base != m->fs.cfa_reg)
10428 addr = plus_constant (Pmode, m->fs.cfa_reg,
10429 m->fs.cfa_offset - cfa_offset);
10430 mem = gen_rtx_MEM (mode, addr);
10431 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
10435 /* Emit code to save registers using MOV insns.
10436 First register is stored at CFA - CFA_OFFSET. */
10437 static void
10438 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10440 unsigned int regno;
10442 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10443 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10445 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10446 cfa_offset -= UNITS_PER_WORD;
10450 /* Emit code to save SSE registers using MOV insns.
10451 First register is stored at CFA - CFA_OFFSET. */
10452 static void
10453 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10455 unsigned int regno;
10457 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10458 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10460 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10461 cfa_offset -= 16;
10465 static GTY(()) rtx queued_cfa_restores;
10467 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10468 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10469 Don't add the note if the previously saved value will be left untouched
10470 within stack red-zone till return, as unwinders can find the same value
10471 in the register and on the stack. */
10473 static void
10474 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
10476 if (!crtl->shrink_wrapped
10477 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10478 return;
10480 if (insn)
10482 add_reg_note (insn, REG_CFA_RESTORE, reg);
10483 RTX_FRAME_RELATED_P (insn) = 1;
10485 else
10486 queued_cfa_restores
10487 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10490 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10492 static void
10493 ix86_add_queued_cfa_restore_notes (rtx insn)
10495 rtx last;
10496 if (!queued_cfa_restores)
10497 return;
10498 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10500 XEXP (last, 1) = REG_NOTES (insn);
10501 REG_NOTES (insn) = queued_cfa_restores;
10502 queued_cfa_restores = NULL_RTX;
10503 RTX_FRAME_RELATED_P (insn) = 1;
10506 /* Expand prologue or epilogue stack adjustment.
10507 The pattern exist to put a dependency on all ebp-based memory accesses.
10508 STYLE should be negative if instructions should be marked as frame related,
10509 zero if %r11 register is live and cannot be freely used and positive
10510 otherwise. */
10512 static void
10513 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10514 int style, bool set_cfa)
10516 struct machine_function *m = cfun->machine;
10517 rtx insn;
10518 bool add_frame_related_expr = false;
10520 if (Pmode == SImode)
10521 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10522 else if (x86_64_immediate_operand (offset, DImode))
10523 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10524 else
10526 rtx tmp;
10527 /* r11 is used by indirect sibcall return as well, set before the
10528 epilogue and used after the epilogue. */
10529 if (style)
10530 tmp = gen_rtx_REG (DImode, R11_REG);
10531 else
10533 gcc_assert (src != hard_frame_pointer_rtx
10534 && dest != hard_frame_pointer_rtx);
10535 tmp = hard_frame_pointer_rtx;
10537 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10538 if (style < 0)
10539 add_frame_related_expr = true;
10541 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10544 insn = emit_insn (insn);
10545 if (style >= 0)
10546 ix86_add_queued_cfa_restore_notes (insn);
10548 if (set_cfa)
10550 rtx r;
10552 gcc_assert (m->fs.cfa_reg == src);
10553 m->fs.cfa_offset += INTVAL (offset);
10554 m->fs.cfa_reg = dest;
10556 r = gen_rtx_PLUS (Pmode, src, offset);
10557 r = gen_rtx_SET (VOIDmode, dest, r);
10558 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10559 RTX_FRAME_RELATED_P (insn) = 1;
10561 else if (style < 0)
10563 RTX_FRAME_RELATED_P (insn) = 1;
10564 if (add_frame_related_expr)
10566 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10567 r = gen_rtx_SET (VOIDmode, dest, r);
10568 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10572 if (dest == stack_pointer_rtx)
10574 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10575 bool valid = m->fs.sp_valid;
10577 if (src == hard_frame_pointer_rtx)
10579 valid = m->fs.fp_valid;
10580 ooffset = m->fs.fp_offset;
10582 else if (src == crtl->drap_reg)
10584 valid = m->fs.drap_valid;
10585 ooffset = 0;
10587 else
10589 /* Else there are two possibilities: SP itself, which we set
10590 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10591 taken care of this by hand along the eh_return path. */
10592 gcc_checking_assert (src == stack_pointer_rtx
10593 || offset == const0_rtx);
10596 m->fs.sp_offset = ooffset - INTVAL (offset);
10597 m->fs.sp_valid = valid;
10601 /* Find an available register to be used as dynamic realign argument
10602 pointer regsiter. Such a register will be written in prologue and
10603 used in begin of body, so it must not be
10604 1. parameter passing register.
10605 2. GOT pointer.
10606 We reuse static-chain register if it is available. Otherwise, we
10607 use DI for i386 and R13 for x86-64. We chose R13 since it has
10608 shorter encoding.
10610 Return: the regno of chosen register. */
10612 static unsigned int
10613 find_drap_reg (void)
10615 tree decl = cfun->decl;
10617 if (TARGET_64BIT)
10619 /* Use R13 for nested function or function need static chain.
10620 Since function with tail call may use any caller-saved
10621 registers in epilogue, DRAP must not use caller-saved
10622 register in such case. */
10623 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10624 return R13_REG;
10626 return R10_REG;
10628 else
10630 /* Use DI for nested function or function need static chain.
10631 Since function with tail call may use any caller-saved
10632 registers in epilogue, DRAP must not use caller-saved
10633 register in such case. */
10634 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10635 return DI_REG;
10637 /* Reuse static chain register if it isn't used for parameter
10638 passing. */
10639 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10641 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10642 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10643 return CX_REG;
10645 return DI_REG;
10649 /* Return minimum incoming stack alignment. */
10651 static unsigned int
10652 ix86_minimum_incoming_stack_boundary (bool sibcall)
10654 unsigned int incoming_stack_boundary;
10656 /* Prefer the one specified at command line. */
10657 if (ix86_user_incoming_stack_boundary)
10658 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10659 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10660 if -mstackrealign is used, it isn't used for sibcall check and
10661 estimated stack alignment is 128bit. */
10662 else if (!sibcall
10663 && !TARGET_64BIT
10664 && ix86_force_align_arg_pointer
10665 && crtl->stack_alignment_estimated == 128)
10666 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10667 else
10668 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10670 /* Incoming stack alignment can be changed on individual functions
10671 via force_align_arg_pointer attribute. We use the smallest
10672 incoming stack boundary. */
10673 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10674 && lookup_attribute (ix86_force_align_arg_pointer_string,
10675 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10676 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10678 /* The incoming stack frame has to be aligned at least at
10679 parm_stack_boundary. */
10680 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10681 incoming_stack_boundary = crtl->parm_stack_boundary;
10683 /* Stack at entrance of main is aligned by runtime. We use the
10684 smallest incoming stack boundary. */
10685 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10686 && DECL_NAME (current_function_decl)
10687 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10688 && DECL_FILE_SCOPE_P (current_function_decl))
10689 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10691 return incoming_stack_boundary;
10694 /* Update incoming stack boundary and estimated stack alignment. */
10696 static void
10697 ix86_update_stack_boundary (void)
10699 ix86_incoming_stack_boundary
10700 = ix86_minimum_incoming_stack_boundary (false);
10702 /* x86_64 vararg needs 16byte stack alignment for register save
10703 area. */
10704 if (TARGET_64BIT
10705 && cfun->stdarg
10706 && crtl->stack_alignment_estimated < 128)
10707 crtl->stack_alignment_estimated = 128;
10710 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10711 needed or an rtx for DRAP otherwise. */
10713 static rtx
10714 ix86_get_drap_rtx (void)
10716 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10717 crtl->need_drap = true;
10719 if (stack_realign_drap)
10721 /* Assign DRAP to vDRAP and returns vDRAP */
10722 unsigned int regno = find_drap_reg ();
10723 rtx drap_vreg;
10724 rtx arg_ptr;
10725 rtx_insn *seq, *insn;
10727 arg_ptr = gen_rtx_REG (Pmode, regno);
10728 crtl->drap_reg = arg_ptr;
10730 start_sequence ();
10731 drap_vreg = copy_to_reg (arg_ptr);
10732 seq = get_insns ();
10733 end_sequence ();
10735 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10736 if (!optimize)
10738 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10739 RTX_FRAME_RELATED_P (insn) = 1;
10741 return drap_vreg;
10743 else
10744 return NULL;
10747 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10749 static rtx
10750 ix86_internal_arg_pointer (void)
10752 return virtual_incoming_args_rtx;
10755 struct scratch_reg {
10756 rtx reg;
10757 bool saved;
10760 /* Return a short-lived scratch register for use on function entry.
10761 In 32-bit mode, it is valid only after the registers are saved
10762 in the prologue. This register must be released by means of
10763 release_scratch_register_on_entry once it is dead. */
10765 static void
10766 get_scratch_register_on_entry (struct scratch_reg *sr)
10768 int regno;
10770 sr->saved = false;
10772 if (TARGET_64BIT)
10774 /* We always use R11 in 64-bit mode. */
10775 regno = R11_REG;
10777 else
10779 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10780 bool fastcall_p
10781 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10782 bool thiscall_p
10783 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10784 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10785 int regparm = ix86_function_regparm (fntype, decl);
10786 int drap_regno
10787 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10789 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10790 for the static chain register. */
10791 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10792 && drap_regno != AX_REG)
10793 regno = AX_REG;
10794 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10795 for the static chain register. */
10796 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10797 regno = AX_REG;
10798 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10799 regno = DX_REG;
10800 /* ecx is the static chain register. */
10801 else if (regparm < 3 && !fastcall_p && !thiscall_p
10802 && !static_chain_p
10803 && drap_regno != CX_REG)
10804 regno = CX_REG;
10805 else if (ix86_save_reg (BX_REG, true))
10806 regno = BX_REG;
10807 /* esi is the static chain register. */
10808 else if (!(regparm == 3 && static_chain_p)
10809 && ix86_save_reg (SI_REG, true))
10810 regno = SI_REG;
10811 else if (ix86_save_reg (DI_REG, true))
10812 regno = DI_REG;
10813 else
10815 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10816 sr->saved = true;
10820 sr->reg = gen_rtx_REG (Pmode, regno);
10821 if (sr->saved)
10823 rtx insn = emit_insn (gen_push (sr->reg));
10824 RTX_FRAME_RELATED_P (insn) = 1;
10828 /* Release a scratch register obtained from the preceding function. */
10830 static void
10831 release_scratch_register_on_entry (struct scratch_reg *sr)
10833 if (sr->saved)
10835 struct machine_function *m = cfun->machine;
10836 rtx x, insn = emit_insn (gen_pop (sr->reg));
10838 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10839 RTX_FRAME_RELATED_P (insn) = 1;
10840 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10841 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10842 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10843 m->fs.sp_offset -= UNITS_PER_WORD;
10847 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10849 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10851 static void
10852 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10854 /* We skip the probe for the first interval + a small dope of 4 words and
10855 probe that many bytes past the specified size to maintain a protection
10856 area at the botton of the stack. */
10857 const int dope = 4 * UNITS_PER_WORD;
10858 rtx size_rtx = GEN_INT (size), last;
10860 /* See if we have a constant small number of probes to generate. If so,
10861 that's the easy case. The run-time loop is made up of 11 insns in the
10862 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10863 for n # of intervals. */
10864 if (size <= 5 * PROBE_INTERVAL)
10866 HOST_WIDE_INT i, adjust;
10867 bool first_probe = true;
10869 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10870 values of N from 1 until it exceeds SIZE. If only one probe is
10871 needed, this will not generate any code. Then adjust and probe
10872 to PROBE_INTERVAL + SIZE. */
10873 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10875 if (first_probe)
10877 adjust = 2 * PROBE_INTERVAL + dope;
10878 first_probe = false;
10880 else
10881 adjust = PROBE_INTERVAL;
10883 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10884 plus_constant (Pmode, stack_pointer_rtx,
10885 -adjust)));
10886 emit_stack_probe (stack_pointer_rtx);
10889 if (first_probe)
10890 adjust = size + PROBE_INTERVAL + dope;
10891 else
10892 adjust = size + PROBE_INTERVAL - i;
10894 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10895 plus_constant (Pmode, stack_pointer_rtx,
10896 -adjust)));
10897 emit_stack_probe (stack_pointer_rtx);
10899 /* Adjust back to account for the additional first interval. */
10900 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10901 plus_constant (Pmode, stack_pointer_rtx,
10902 PROBE_INTERVAL + dope)));
10905 /* Otherwise, do the same as above, but in a loop. Note that we must be
10906 extra careful with variables wrapping around because we might be at
10907 the very top (or the very bottom) of the address space and we have
10908 to be able to handle this case properly; in particular, we use an
10909 equality test for the loop condition. */
10910 else
10912 HOST_WIDE_INT rounded_size;
10913 struct scratch_reg sr;
10915 get_scratch_register_on_entry (&sr);
10918 /* Step 1: round SIZE to the previous multiple of the interval. */
10920 rounded_size = size & -PROBE_INTERVAL;
10923 /* Step 2: compute initial and final value of the loop counter. */
10925 /* SP = SP_0 + PROBE_INTERVAL. */
10926 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10927 plus_constant (Pmode, stack_pointer_rtx,
10928 - (PROBE_INTERVAL + dope))));
10930 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10931 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10932 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10933 gen_rtx_PLUS (Pmode, sr.reg,
10934 stack_pointer_rtx)));
10937 /* Step 3: the loop
10939 while (SP != LAST_ADDR)
10941 SP = SP + PROBE_INTERVAL
10942 probe at SP
10945 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10946 values of N from 1 until it is equal to ROUNDED_SIZE. */
10948 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10951 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10952 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10954 if (size != rounded_size)
10956 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10957 plus_constant (Pmode, stack_pointer_rtx,
10958 rounded_size - size)));
10959 emit_stack_probe (stack_pointer_rtx);
10962 /* Adjust back to account for the additional first interval. */
10963 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10964 plus_constant (Pmode, stack_pointer_rtx,
10965 PROBE_INTERVAL + dope)));
10967 release_scratch_register_on_entry (&sr);
10970 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10972 /* Even if the stack pointer isn't the CFA register, we need to correctly
10973 describe the adjustments made to it, in particular differentiate the
10974 frame-related ones from the frame-unrelated ones. */
10975 if (size > 0)
10977 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10978 XVECEXP (expr, 0, 0)
10979 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10980 plus_constant (Pmode, stack_pointer_rtx, -size));
10981 XVECEXP (expr, 0, 1)
10982 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10983 plus_constant (Pmode, stack_pointer_rtx,
10984 PROBE_INTERVAL + dope + size));
10985 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10986 RTX_FRAME_RELATED_P (last) = 1;
10988 cfun->machine->fs.sp_offset += size;
10991 /* Make sure nothing is scheduled before we are done. */
10992 emit_insn (gen_blockage ());
10995 /* Adjust the stack pointer up to REG while probing it. */
10997 const char *
10998 output_adjust_stack_and_probe (rtx reg)
11000 static int labelno = 0;
11001 char loop_lab[32], end_lab[32];
11002 rtx xops[2];
11004 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11005 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11007 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11009 /* Jump to END_LAB if SP == LAST_ADDR. */
11010 xops[0] = stack_pointer_rtx;
11011 xops[1] = reg;
11012 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11013 fputs ("\tje\t", asm_out_file);
11014 assemble_name_raw (asm_out_file, end_lab);
11015 fputc ('\n', asm_out_file);
11017 /* SP = SP + PROBE_INTERVAL. */
11018 xops[1] = GEN_INT (PROBE_INTERVAL);
11019 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11021 /* Probe at SP. */
11022 xops[1] = const0_rtx;
11023 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
11025 fprintf (asm_out_file, "\tjmp\t");
11026 assemble_name_raw (asm_out_file, loop_lab);
11027 fputc ('\n', asm_out_file);
11029 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11031 return "";
11034 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
11035 inclusive. These are offsets from the current stack pointer. */
11037 static void
11038 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
11040 /* See if we have a constant small number of probes to generate. If so,
11041 that's the easy case. The run-time loop is made up of 7 insns in the
11042 generic case while the compile-time loop is made up of n insns for n #
11043 of intervals. */
11044 if (size <= 7 * PROBE_INTERVAL)
11046 HOST_WIDE_INT i;
11048 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
11049 it exceeds SIZE. If only one probe is needed, this will not
11050 generate any code. Then probe at FIRST + SIZE. */
11051 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
11052 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11053 -(first + i)));
11055 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11056 -(first + size)));
11059 /* Otherwise, do the same as above, but in a loop. Note that we must be
11060 extra careful with variables wrapping around because we might be at
11061 the very top (or the very bottom) of the address space and we have
11062 to be able to handle this case properly; in particular, we use an
11063 equality test for the loop condition. */
11064 else
11066 HOST_WIDE_INT rounded_size, last;
11067 struct scratch_reg sr;
11069 get_scratch_register_on_entry (&sr);
11072 /* Step 1: round SIZE to the previous multiple of the interval. */
11074 rounded_size = size & -PROBE_INTERVAL;
11077 /* Step 2: compute initial and final value of the loop counter. */
11079 /* TEST_OFFSET = FIRST. */
11080 emit_move_insn (sr.reg, GEN_INT (-first));
11082 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
11083 last = first + rounded_size;
11086 /* Step 3: the loop
11088 while (TEST_ADDR != LAST_ADDR)
11090 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
11091 probe at TEST_ADDR
11094 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
11095 until it is equal to ROUNDED_SIZE. */
11097 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
11100 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
11101 that SIZE is equal to ROUNDED_SIZE. */
11103 if (size != rounded_size)
11104 emit_stack_probe (plus_constant (Pmode,
11105 gen_rtx_PLUS (Pmode,
11106 stack_pointer_rtx,
11107 sr.reg),
11108 rounded_size - size));
11110 release_scratch_register_on_entry (&sr);
11113 /* Make sure nothing is scheduled before we are done. */
11114 emit_insn (gen_blockage ());
11117 /* Probe a range of stack addresses from REG to END, inclusive. These are
11118 offsets from the current stack pointer. */
11120 const char *
11121 output_probe_stack_range (rtx reg, rtx end)
11123 static int labelno = 0;
11124 char loop_lab[32], end_lab[32];
11125 rtx xops[3];
11127 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11128 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11130 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11132 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
11133 xops[0] = reg;
11134 xops[1] = end;
11135 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11136 fputs ("\tje\t", asm_out_file);
11137 assemble_name_raw (asm_out_file, end_lab);
11138 fputc ('\n', asm_out_file);
11140 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
11141 xops[1] = GEN_INT (PROBE_INTERVAL);
11142 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11144 /* Probe at TEST_ADDR. */
11145 xops[0] = stack_pointer_rtx;
11146 xops[1] = reg;
11147 xops[2] = const0_rtx;
11148 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
11150 fprintf (asm_out_file, "\tjmp\t");
11151 assemble_name_raw (asm_out_file, loop_lab);
11152 fputc ('\n', asm_out_file);
11154 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11156 return "";
11159 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
11160 to be generated in correct form. */
11161 static void
11162 ix86_finalize_stack_realign_flags (void)
11164 /* Check if stack realign is really needed after reload, and
11165 stores result in cfun */
11166 unsigned int incoming_stack_boundary
11167 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11168 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11169 unsigned int stack_realign = (incoming_stack_boundary
11170 < (crtl->is_leaf
11171 ? crtl->max_used_stack_slot_alignment
11172 : crtl->stack_alignment_needed));
11174 if (crtl->stack_realign_finalized)
11176 /* After stack_realign_needed is finalized, we can't no longer
11177 change it. */
11178 gcc_assert (crtl->stack_realign_needed == stack_realign);
11179 return;
11182 /* If the only reason for frame_pointer_needed is that we conservatively
11183 assumed stack realignment might be needed, but in the end nothing that
11184 needed the stack alignment had been spilled, clear frame_pointer_needed
11185 and say we don't need stack realignment. */
11186 if (stack_realign
11187 && frame_pointer_needed
11188 && crtl->is_leaf
11189 && flag_omit_frame_pointer
11190 && crtl->sp_is_unchanging
11191 && !ix86_current_function_calls_tls_descriptor
11192 && !crtl->accesses_prior_frames
11193 && !cfun->calls_alloca
11194 && !crtl->calls_eh_return
11195 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
11196 && !ix86_frame_pointer_required ()
11197 && get_frame_size () == 0
11198 && ix86_nsaved_sseregs () == 0
11199 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11201 HARD_REG_SET set_up_by_prologue, prologue_used;
11202 basic_block bb;
11204 CLEAR_HARD_REG_SET (prologue_used);
11205 CLEAR_HARD_REG_SET (set_up_by_prologue);
11206 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11207 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11208 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11209 HARD_FRAME_POINTER_REGNUM);
11210 FOR_EACH_BB_FN (bb, cfun)
11212 rtx_insn *insn;
11213 FOR_BB_INSNS (bb, insn)
11214 if (NONDEBUG_INSN_P (insn)
11215 && requires_stack_frame_p (insn, prologue_used,
11216 set_up_by_prologue))
11218 crtl->stack_realign_needed = stack_realign;
11219 crtl->stack_realign_finalized = true;
11220 return;
11224 /* If drap has been set, but it actually isn't live at the start
11225 of the function, there is no reason to set it up. */
11226 if (crtl->drap_reg)
11228 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11229 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11231 crtl->drap_reg = NULL_RTX;
11232 crtl->need_drap = false;
11235 else
11236 cfun->machine->no_drap_save_restore = true;
11238 frame_pointer_needed = false;
11239 stack_realign = false;
11240 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11241 crtl->stack_alignment_needed = incoming_stack_boundary;
11242 crtl->stack_alignment_estimated = incoming_stack_boundary;
11243 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11244 crtl->preferred_stack_boundary = incoming_stack_boundary;
11245 df_finish_pass (true);
11246 df_scan_alloc (NULL);
11247 df_scan_blocks ();
11248 df_compute_regs_ever_live (true);
11249 df_analyze ();
11252 crtl->stack_realign_needed = stack_realign;
11253 crtl->stack_realign_finalized = true;
11256 /* Delete SET_GOT right after entry block if it is allocated to reg. */
11258 static void
11259 ix86_elim_entry_set_got (rtx reg)
11261 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11262 rtx_insn *c_insn = BB_HEAD (bb);
11263 if (!NONDEBUG_INSN_P (c_insn))
11264 c_insn = next_nonnote_nondebug_insn (c_insn);
11265 if (c_insn && NONJUMP_INSN_P (c_insn))
11267 rtx pat = PATTERN (c_insn);
11268 if (GET_CODE (pat) == PARALLEL)
11270 rtx vec = XVECEXP (pat, 0, 0);
11271 if (GET_CODE (vec) == SET
11272 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11273 && REGNO (XEXP (vec, 0)) == REGNO (reg))
11274 delete_insn (c_insn);
11279 /* Expand the prologue into a bunch of separate insns. */
11281 void
11282 ix86_expand_prologue (void)
11284 struct machine_function *m = cfun->machine;
11285 rtx insn, t;
11286 struct ix86_frame frame;
11287 HOST_WIDE_INT allocate;
11288 bool int_registers_saved;
11289 bool sse_registers_saved;
11291 ix86_finalize_stack_realign_flags ();
11293 /* DRAP should not coexist with stack_realign_fp */
11294 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11296 memset (&m->fs, 0, sizeof (m->fs));
11298 /* Initialize CFA state for before the prologue. */
11299 m->fs.cfa_reg = stack_pointer_rtx;
11300 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11302 /* Track SP offset to the CFA. We continue tracking this after we've
11303 swapped the CFA register away from SP. In the case of re-alignment
11304 this is fudged; we're interested to offsets within the local frame. */
11305 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11306 m->fs.sp_valid = true;
11308 ix86_compute_frame_layout (&frame);
11310 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11312 /* We should have already generated an error for any use of
11313 ms_hook on a nested function. */
11314 gcc_checking_assert (!ix86_static_chain_on_stack);
11316 /* Check if profiling is active and we shall use profiling before
11317 prologue variant. If so sorry. */
11318 if (crtl->profile && flag_fentry != 0)
11319 sorry ("ms_hook_prologue attribute isn%'t compatible "
11320 "with -mfentry for 32-bit");
11322 /* In ix86_asm_output_function_label we emitted:
11323 8b ff movl.s %edi,%edi
11324 55 push %ebp
11325 8b ec movl.s %esp,%ebp
11327 This matches the hookable function prologue in Win32 API
11328 functions in Microsoft Windows XP Service Pack 2 and newer.
11329 Wine uses this to enable Windows apps to hook the Win32 API
11330 functions provided by Wine.
11332 What that means is that we've already set up the frame pointer. */
11334 if (frame_pointer_needed
11335 && !(crtl->drap_reg && crtl->stack_realign_needed))
11337 rtx push, mov;
11339 /* We've decided to use the frame pointer already set up.
11340 Describe this to the unwinder by pretending that both
11341 push and mov insns happen right here.
11343 Putting the unwind info here at the end of the ms_hook
11344 is done so that we can make absolutely certain we get
11345 the required byte sequence at the start of the function,
11346 rather than relying on an assembler that can produce
11347 the exact encoding required.
11349 However it does mean (in the unpatched case) that we have
11350 a 1 insn window where the asynchronous unwind info is
11351 incorrect. However, if we placed the unwind info at
11352 its correct location we would have incorrect unwind info
11353 in the patched case. Which is probably all moot since
11354 I don't expect Wine generates dwarf2 unwind info for the
11355 system libraries that use this feature. */
11357 insn = emit_insn (gen_blockage ());
11359 push = gen_push (hard_frame_pointer_rtx);
11360 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
11361 stack_pointer_rtx);
11362 RTX_FRAME_RELATED_P (push) = 1;
11363 RTX_FRAME_RELATED_P (mov) = 1;
11365 RTX_FRAME_RELATED_P (insn) = 1;
11366 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11367 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11369 /* Note that gen_push incremented m->fs.cfa_offset, even
11370 though we didn't emit the push insn here. */
11371 m->fs.cfa_reg = hard_frame_pointer_rtx;
11372 m->fs.fp_offset = m->fs.cfa_offset;
11373 m->fs.fp_valid = true;
11375 else
11377 /* The frame pointer is not needed so pop %ebp again.
11378 This leaves us with a pristine state. */
11379 emit_insn (gen_pop (hard_frame_pointer_rtx));
11383 /* The first insn of a function that accepts its static chain on the
11384 stack is to push the register that would be filled in by a direct
11385 call. This insn will be skipped by the trampoline. */
11386 else if (ix86_static_chain_on_stack)
11388 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
11389 emit_insn (gen_blockage ());
11391 /* We don't want to interpret this push insn as a register save,
11392 only as a stack adjustment. The real copy of the register as
11393 a save will be done later, if needed. */
11394 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11395 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
11396 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11397 RTX_FRAME_RELATED_P (insn) = 1;
11400 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11401 of DRAP is needed and stack realignment is really needed after reload */
11402 if (stack_realign_drap)
11404 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11406 /* Only need to push parameter pointer reg if it is caller saved. */
11407 if (!call_used_regs[REGNO (crtl->drap_reg)])
11409 /* Push arg pointer reg */
11410 insn = emit_insn (gen_push (crtl->drap_reg));
11411 RTX_FRAME_RELATED_P (insn) = 1;
11414 /* Grab the argument pointer. */
11415 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11416 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11417 RTX_FRAME_RELATED_P (insn) = 1;
11418 m->fs.cfa_reg = crtl->drap_reg;
11419 m->fs.cfa_offset = 0;
11421 /* Align the stack. */
11422 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11423 stack_pointer_rtx,
11424 GEN_INT (-align_bytes)));
11425 RTX_FRAME_RELATED_P (insn) = 1;
11427 /* Replicate the return address on the stack so that return
11428 address can be reached via (argp - 1) slot. This is needed
11429 to implement macro RETURN_ADDR_RTX and intrinsic function
11430 expand_builtin_return_addr etc. */
11431 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11432 t = gen_frame_mem (word_mode, t);
11433 insn = emit_insn (gen_push (t));
11434 RTX_FRAME_RELATED_P (insn) = 1;
11436 /* For the purposes of frame and register save area addressing,
11437 we've started over with a new frame. */
11438 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11439 m->fs.realigned = true;
11442 int_registers_saved = (frame.nregs == 0);
11443 sse_registers_saved = (frame.nsseregs == 0);
11445 if (frame_pointer_needed && !m->fs.fp_valid)
11447 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11448 slower on all targets. Also sdb doesn't like it. */
11449 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11450 RTX_FRAME_RELATED_P (insn) = 1;
11452 /* Push registers now, before setting the frame pointer
11453 on SEH target. */
11454 if (!int_registers_saved
11455 && TARGET_SEH
11456 && !frame.save_regs_using_mov)
11458 ix86_emit_save_regs ();
11459 int_registers_saved = true;
11460 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11463 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11465 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11466 RTX_FRAME_RELATED_P (insn) = 1;
11468 if (m->fs.cfa_reg == stack_pointer_rtx)
11469 m->fs.cfa_reg = hard_frame_pointer_rtx;
11470 m->fs.fp_offset = m->fs.sp_offset;
11471 m->fs.fp_valid = true;
11475 if (!int_registers_saved)
11477 /* If saving registers via PUSH, do so now. */
11478 if (!frame.save_regs_using_mov)
11480 ix86_emit_save_regs ();
11481 int_registers_saved = true;
11482 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11485 /* When using red zone we may start register saving before allocating
11486 the stack frame saving one cycle of the prologue. However, avoid
11487 doing this if we have to probe the stack; at least on x86_64 the
11488 stack probe can turn into a call that clobbers a red zone location. */
11489 else if (ix86_using_red_zone ()
11490 && (! TARGET_STACK_PROBE
11491 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11493 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11494 int_registers_saved = true;
11498 if (stack_realign_fp)
11500 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11501 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11503 /* The computation of the size of the re-aligned stack frame means
11504 that we must allocate the size of the register save area before
11505 performing the actual alignment. Otherwise we cannot guarantee
11506 that there's enough storage above the realignment point. */
11507 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11508 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11509 GEN_INT (m->fs.sp_offset
11510 - frame.sse_reg_save_offset),
11511 -1, false);
11513 /* Align the stack. */
11514 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11515 stack_pointer_rtx,
11516 GEN_INT (-align_bytes)));
11518 /* For the purposes of register save area addressing, the stack
11519 pointer is no longer valid. As for the value of sp_offset,
11520 see ix86_compute_frame_layout, which we need to match in order
11521 to pass verification of stack_pointer_offset at the end. */
11522 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11523 m->fs.sp_valid = false;
11526 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11528 if (flag_stack_usage_info)
11530 /* We start to count from ARG_POINTER. */
11531 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11533 /* If it was realigned, take into account the fake frame. */
11534 if (stack_realign_drap)
11536 if (ix86_static_chain_on_stack)
11537 stack_size += UNITS_PER_WORD;
11539 if (!call_used_regs[REGNO (crtl->drap_reg)])
11540 stack_size += UNITS_PER_WORD;
11542 /* This over-estimates by 1 minimal-stack-alignment-unit but
11543 mitigates that by counting in the new return address slot. */
11544 current_function_dynamic_stack_size
11545 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11548 current_function_static_stack_size = stack_size;
11551 /* On SEH target with very large frame size, allocate an area to save
11552 SSE registers (as the very large allocation won't be described). */
11553 if (TARGET_SEH
11554 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11555 && !sse_registers_saved)
11557 HOST_WIDE_INT sse_size =
11558 frame.sse_reg_save_offset - frame.reg_save_offset;
11560 gcc_assert (int_registers_saved);
11562 /* No need to do stack checking as the area will be immediately
11563 written. */
11564 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11565 GEN_INT (-sse_size), -1,
11566 m->fs.cfa_reg == stack_pointer_rtx);
11567 allocate -= sse_size;
11568 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11569 sse_registers_saved = true;
11572 /* The stack has already been decremented by the instruction calling us
11573 so probe if the size is non-negative to preserve the protection area. */
11574 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11576 /* We expect the registers to be saved when probes are used. */
11577 gcc_assert (int_registers_saved);
11579 if (STACK_CHECK_MOVING_SP)
11581 if (!(crtl->is_leaf && !cfun->calls_alloca
11582 && allocate <= PROBE_INTERVAL))
11584 ix86_adjust_stack_and_probe (allocate);
11585 allocate = 0;
11588 else
11590 HOST_WIDE_INT size = allocate;
11592 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11593 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11595 if (TARGET_STACK_PROBE)
11597 if (crtl->is_leaf && !cfun->calls_alloca)
11599 if (size > PROBE_INTERVAL)
11600 ix86_emit_probe_stack_range (0, size);
11602 else
11603 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11605 else
11607 if (crtl->is_leaf && !cfun->calls_alloca)
11609 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11610 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11611 size - STACK_CHECK_PROTECT);
11613 else
11614 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11619 if (allocate == 0)
11621 else if (!ix86_target_stack_probe ()
11622 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11624 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11625 GEN_INT (-allocate), -1,
11626 m->fs.cfa_reg == stack_pointer_rtx);
11628 else
11630 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11631 rtx r10 = NULL;
11632 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11633 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11634 bool eax_live = ix86_eax_live_at_start_p ();
11635 bool r10_live = false;
11637 if (TARGET_64BIT)
11638 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11640 if (eax_live)
11642 insn = emit_insn (gen_push (eax));
11643 allocate -= UNITS_PER_WORD;
11644 /* Note that SEH directives need to continue tracking the stack
11645 pointer even after the frame pointer has been set up. */
11646 if (sp_is_cfa_reg || TARGET_SEH)
11648 if (sp_is_cfa_reg)
11649 m->fs.cfa_offset += UNITS_PER_WORD;
11650 RTX_FRAME_RELATED_P (insn) = 1;
11651 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11652 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11653 plus_constant (Pmode, stack_pointer_rtx,
11654 -UNITS_PER_WORD)));
11658 if (r10_live)
11660 r10 = gen_rtx_REG (Pmode, R10_REG);
11661 insn = emit_insn (gen_push (r10));
11662 allocate -= UNITS_PER_WORD;
11663 if (sp_is_cfa_reg || TARGET_SEH)
11665 if (sp_is_cfa_reg)
11666 m->fs.cfa_offset += UNITS_PER_WORD;
11667 RTX_FRAME_RELATED_P (insn) = 1;
11668 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11669 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11670 plus_constant (Pmode, stack_pointer_rtx,
11671 -UNITS_PER_WORD)));
11675 emit_move_insn (eax, GEN_INT (allocate));
11676 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11678 /* Use the fact that AX still contains ALLOCATE. */
11679 adjust_stack_insn = (Pmode == DImode
11680 ? gen_pro_epilogue_adjust_stack_di_sub
11681 : gen_pro_epilogue_adjust_stack_si_sub);
11683 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11684 stack_pointer_rtx, eax));
11686 if (sp_is_cfa_reg || TARGET_SEH)
11688 if (sp_is_cfa_reg)
11689 m->fs.cfa_offset += allocate;
11690 RTX_FRAME_RELATED_P (insn) = 1;
11691 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11692 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11693 plus_constant (Pmode, stack_pointer_rtx,
11694 -allocate)));
11696 m->fs.sp_offset += allocate;
11698 /* Use stack_pointer_rtx for relative addressing so that code
11699 works for realigned stack, too. */
11700 if (r10_live && eax_live)
11702 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11703 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11704 gen_frame_mem (word_mode, t));
11705 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11706 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11707 gen_frame_mem (word_mode, t));
11709 else if (eax_live || r10_live)
11711 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11712 emit_move_insn (gen_rtx_REG (word_mode,
11713 (eax_live ? AX_REG : R10_REG)),
11714 gen_frame_mem (word_mode, t));
11717 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11719 /* If we havn't already set up the frame pointer, do so now. */
11720 if (frame_pointer_needed && !m->fs.fp_valid)
11722 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11723 GEN_INT (frame.stack_pointer_offset
11724 - frame.hard_frame_pointer_offset));
11725 insn = emit_insn (insn);
11726 RTX_FRAME_RELATED_P (insn) = 1;
11727 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11729 if (m->fs.cfa_reg == stack_pointer_rtx)
11730 m->fs.cfa_reg = hard_frame_pointer_rtx;
11731 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11732 m->fs.fp_valid = true;
11735 if (!int_registers_saved)
11736 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11737 if (!sse_registers_saved)
11738 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11740 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11741 in PROLOGUE. */
11742 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11744 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11745 insn = emit_insn (gen_set_got (pic));
11746 RTX_FRAME_RELATED_P (insn) = 1;
11747 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11748 emit_insn (gen_prologue_use (pic));
11749 /* Deleting already emmitted SET_GOT if exist and allocated to
11750 REAL_PIC_OFFSET_TABLE_REGNUM. */
11751 ix86_elim_entry_set_got (pic);
11754 if (crtl->drap_reg && !crtl->stack_realign_needed)
11756 /* vDRAP is setup but after reload it turns out stack realign
11757 isn't necessary, here we will emit prologue to setup DRAP
11758 without stack realign adjustment */
11759 t = choose_baseaddr (0);
11760 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11763 /* Prevent instructions from being scheduled into register save push
11764 sequence when access to the redzone area is done through frame pointer.
11765 The offset between the frame pointer and the stack pointer is calculated
11766 relative to the value of the stack pointer at the end of the function
11767 prologue, and moving instructions that access redzone area via frame
11768 pointer inside push sequence violates this assumption. */
11769 if (frame_pointer_needed && frame.red_zone_size)
11770 emit_insn (gen_memory_blockage ());
11772 /* Emit cld instruction if stringops are used in the function. */
11773 if (TARGET_CLD && ix86_current_function_needs_cld)
11774 emit_insn (gen_cld ());
11776 /* SEH requires that the prologue end within 256 bytes of the start of
11777 the function. Prevent instruction schedules that would extend that.
11778 Further, prevent alloca modifications to the stack pointer from being
11779 combined with prologue modifications. */
11780 if (TARGET_SEH)
11781 emit_insn (gen_prologue_use (stack_pointer_rtx));
11784 /* Emit code to restore REG using a POP insn. */
11786 static void
11787 ix86_emit_restore_reg_using_pop (rtx reg)
11789 struct machine_function *m = cfun->machine;
11790 rtx insn = emit_insn (gen_pop (reg));
11792 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11793 m->fs.sp_offset -= UNITS_PER_WORD;
11795 if (m->fs.cfa_reg == crtl->drap_reg
11796 && REGNO (reg) == REGNO (crtl->drap_reg))
11798 /* Previously we'd represented the CFA as an expression
11799 like *(%ebp - 8). We've just popped that value from
11800 the stack, which means we need to reset the CFA to
11801 the drap register. This will remain until we restore
11802 the stack pointer. */
11803 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11804 RTX_FRAME_RELATED_P (insn) = 1;
11806 /* This means that the DRAP register is valid for addressing too. */
11807 m->fs.drap_valid = true;
11808 return;
11811 if (m->fs.cfa_reg == stack_pointer_rtx)
11813 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11814 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11815 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11816 RTX_FRAME_RELATED_P (insn) = 1;
11818 m->fs.cfa_offset -= UNITS_PER_WORD;
11821 /* When the frame pointer is the CFA, and we pop it, we are
11822 swapping back to the stack pointer as the CFA. This happens
11823 for stack frames that don't allocate other data, so we assume
11824 the stack pointer is now pointing at the return address, i.e.
11825 the function entry state, which makes the offset be 1 word. */
11826 if (reg == hard_frame_pointer_rtx)
11828 m->fs.fp_valid = false;
11829 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11831 m->fs.cfa_reg = stack_pointer_rtx;
11832 m->fs.cfa_offset -= UNITS_PER_WORD;
11834 add_reg_note (insn, REG_CFA_DEF_CFA,
11835 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11836 GEN_INT (m->fs.cfa_offset)));
11837 RTX_FRAME_RELATED_P (insn) = 1;
11842 /* Emit code to restore saved registers using POP insns. */
11844 static void
11845 ix86_emit_restore_regs_using_pop (void)
11847 unsigned int regno;
11849 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11850 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11851 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11854 /* Emit code and notes for the LEAVE instruction. */
11856 static void
11857 ix86_emit_leave (void)
11859 struct machine_function *m = cfun->machine;
11860 rtx insn = emit_insn (ix86_gen_leave ());
11862 ix86_add_queued_cfa_restore_notes (insn);
11864 gcc_assert (m->fs.fp_valid);
11865 m->fs.sp_valid = true;
11866 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11867 m->fs.fp_valid = false;
11869 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11871 m->fs.cfa_reg = stack_pointer_rtx;
11872 m->fs.cfa_offset = m->fs.sp_offset;
11874 add_reg_note (insn, REG_CFA_DEF_CFA,
11875 plus_constant (Pmode, stack_pointer_rtx,
11876 m->fs.sp_offset));
11877 RTX_FRAME_RELATED_P (insn) = 1;
11879 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11880 m->fs.fp_offset);
11883 /* Emit code to restore saved registers using MOV insns.
11884 First register is restored from CFA - CFA_OFFSET. */
11885 static void
11886 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11887 bool maybe_eh_return)
11889 struct machine_function *m = cfun->machine;
11890 unsigned int regno;
11892 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11893 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11895 rtx reg = gen_rtx_REG (word_mode, regno);
11896 rtx insn, mem;
11898 mem = choose_baseaddr (cfa_offset);
11899 mem = gen_frame_mem (word_mode, mem);
11900 insn = emit_move_insn (reg, mem);
11902 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11904 /* Previously we'd represented the CFA as an expression
11905 like *(%ebp - 8). We've just popped that value from
11906 the stack, which means we need to reset the CFA to
11907 the drap register. This will remain until we restore
11908 the stack pointer. */
11909 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11910 RTX_FRAME_RELATED_P (insn) = 1;
11912 /* This means that the DRAP register is valid for addressing. */
11913 m->fs.drap_valid = true;
11915 else
11916 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11918 cfa_offset -= UNITS_PER_WORD;
11922 /* Emit code to restore saved registers using MOV insns.
11923 First register is restored from CFA - CFA_OFFSET. */
11924 static void
11925 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11926 bool maybe_eh_return)
11928 unsigned int regno;
11930 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11931 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11933 rtx reg = gen_rtx_REG (V4SFmode, regno);
11934 rtx mem;
11936 mem = choose_baseaddr (cfa_offset);
11937 mem = gen_rtx_MEM (V4SFmode, mem);
11938 set_mem_align (mem, 128);
11939 emit_move_insn (reg, mem);
11941 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11943 cfa_offset -= 16;
11947 /* Restore function stack, frame, and registers. */
11949 void
11950 ix86_expand_epilogue (int style)
11952 struct machine_function *m = cfun->machine;
11953 struct machine_frame_state frame_state_save = m->fs;
11954 struct ix86_frame frame;
11955 bool restore_regs_via_mov;
11956 bool using_drap;
11958 ix86_finalize_stack_realign_flags ();
11959 ix86_compute_frame_layout (&frame);
11961 m->fs.sp_valid = (!frame_pointer_needed
11962 || (crtl->sp_is_unchanging
11963 && !stack_realign_fp));
11964 gcc_assert (!m->fs.sp_valid
11965 || m->fs.sp_offset == frame.stack_pointer_offset);
11967 /* The FP must be valid if the frame pointer is present. */
11968 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11969 gcc_assert (!m->fs.fp_valid
11970 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11972 /* We must have *some* valid pointer to the stack frame. */
11973 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11975 /* The DRAP is never valid at this point. */
11976 gcc_assert (!m->fs.drap_valid);
11978 /* See the comment about red zone and frame
11979 pointer usage in ix86_expand_prologue. */
11980 if (frame_pointer_needed && frame.red_zone_size)
11981 emit_insn (gen_memory_blockage ());
11983 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11984 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11986 /* Determine the CFA offset of the end of the red-zone. */
11987 m->fs.red_zone_offset = 0;
11988 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11990 /* The red-zone begins below the return address. */
11991 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11993 /* When the register save area is in the aligned portion of
11994 the stack, determine the maximum runtime displacement that
11995 matches up with the aligned frame. */
11996 if (stack_realign_drap)
11997 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11998 + UNITS_PER_WORD);
12001 /* Special care must be taken for the normal return case of a function
12002 using eh_return: the eax and edx registers are marked as saved, but
12003 not restored along this path. Adjust the save location to match. */
12004 if (crtl->calls_eh_return && style != 2)
12005 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
12007 /* EH_RETURN requires the use of moves to function properly. */
12008 if (crtl->calls_eh_return)
12009 restore_regs_via_mov = true;
12010 /* SEH requires the use of pops to identify the epilogue. */
12011 else if (TARGET_SEH)
12012 restore_regs_via_mov = false;
12013 /* If we're only restoring one register and sp is not valid then
12014 using a move instruction to restore the register since it's
12015 less work than reloading sp and popping the register. */
12016 else if (!m->fs.sp_valid && frame.nregs <= 1)
12017 restore_regs_via_mov = true;
12018 else if (TARGET_EPILOGUE_USING_MOVE
12019 && cfun->machine->use_fast_prologue_epilogue
12020 && (frame.nregs > 1
12021 || m->fs.sp_offset != frame.reg_save_offset))
12022 restore_regs_via_mov = true;
12023 else if (frame_pointer_needed
12024 && !frame.nregs
12025 && m->fs.sp_offset != frame.reg_save_offset)
12026 restore_regs_via_mov = true;
12027 else if (frame_pointer_needed
12028 && TARGET_USE_LEAVE
12029 && cfun->machine->use_fast_prologue_epilogue
12030 && frame.nregs == 1)
12031 restore_regs_via_mov = true;
12032 else
12033 restore_regs_via_mov = false;
12035 if (restore_regs_via_mov || frame.nsseregs)
12037 /* Ensure that the entire register save area is addressable via
12038 the stack pointer, if we will restore via sp. */
12039 if (TARGET_64BIT
12040 && m->fs.sp_offset > 0x7fffffff
12041 && !(m->fs.fp_valid || m->fs.drap_valid)
12042 && (frame.nsseregs + frame.nregs) != 0)
12044 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12045 GEN_INT (m->fs.sp_offset
12046 - frame.sse_reg_save_offset),
12047 style,
12048 m->fs.cfa_reg == stack_pointer_rtx);
12052 /* If there are any SSE registers to restore, then we have to do it
12053 via moves, since there's obviously no pop for SSE regs. */
12054 if (frame.nsseregs)
12055 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
12056 style == 2);
12058 if (restore_regs_via_mov)
12060 rtx t;
12062 if (frame.nregs)
12063 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
12065 /* eh_return epilogues need %ecx added to the stack pointer. */
12066 if (style == 2)
12068 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
12070 /* Stack align doesn't work with eh_return. */
12071 gcc_assert (!stack_realign_drap);
12072 /* Neither does regparm nested functions. */
12073 gcc_assert (!ix86_static_chain_on_stack);
12075 if (frame_pointer_needed)
12077 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
12078 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
12079 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
12081 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
12082 insn = emit_move_insn (hard_frame_pointer_rtx, t);
12084 /* Note that we use SA as a temporary CFA, as the return
12085 address is at the proper place relative to it. We
12086 pretend this happens at the FP restore insn because
12087 prior to this insn the FP would be stored at the wrong
12088 offset relative to SA, and after this insn we have no
12089 other reasonable register to use for the CFA. We don't
12090 bother resetting the CFA to the SP for the duration of
12091 the return insn. */
12092 add_reg_note (insn, REG_CFA_DEF_CFA,
12093 plus_constant (Pmode, sa, UNITS_PER_WORD));
12094 ix86_add_queued_cfa_restore_notes (insn);
12095 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
12096 RTX_FRAME_RELATED_P (insn) = 1;
12098 m->fs.cfa_reg = sa;
12099 m->fs.cfa_offset = UNITS_PER_WORD;
12100 m->fs.fp_valid = false;
12102 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
12103 const0_rtx, style, false);
12105 else
12107 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
12108 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
12109 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
12110 ix86_add_queued_cfa_restore_notes (insn);
12112 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
12113 if (m->fs.cfa_offset != UNITS_PER_WORD)
12115 m->fs.cfa_offset = UNITS_PER_WORD;
12116 add_reg_note (insn, REG_CFA_DEF_CFA,
12117 plus_constant (Pmode, stack_pointer_rtx,
12118 UNITS_PER_WORD));
12119 RTX_FRAME_RELATED_P (insn) = 1;
12122 m->fs.sp_offset = UNITS_PER_WORD;
12123 m->fs.sp_valid = true;
12126 else
12128 /* SEH requires that the function end with (1) a stack adjustment
12129 if necessary, (2) a sequence of pops, and (3) a return or
12130 jump instruction. Prevent insns from the function body from
12131 being scheduled into this sequence. */
12132 if (TARGET_SEH)
12134 /* Prevent a catch region from being adjacent to the standard
12135 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
12136 several other flags that would be interesting to test are
12137 not yet set up. */
12138 if (flag_non_call_exceptions)
12139 emit_insn (gen_nops (const1_rtx));
12140 else
12141 emit_insn (gen_blockage ());
12144 /* First step is to deallocate the stack frame so that we can
12145 pop the registers. Also do it on SEH target for very large
12146 frame as the emitted instructions aren't allowed by the ABI in
12147 epilogues. */
12148 if (!m->fs.sp_valid
12149 || (TARGET_SEH
12150 && (m->fs.sp_offset - frame.reg_save_offset
12151 >= SEH_MAX_FRAME_SIZE)))
12153 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
12154 GEN_INT (m->fs.fp_offset
12155 - frame.reg_save_offset),
12156 style, false);
12158 else if (m->fs.sp_offset != frame.reg_save_offset)
12160 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12161 GEN_INT (m->fs.sp_offset
12162 - frame.reg_save_offset),
12163 style,
12164 m->fs.cfa_reg == stack_pointer_rtx);
12167 ix86_emit_restore_regs_using_pop ();
12170 /* If we used a stack pointer and haven't already got rid of it,
12171 then do so now. */
12172 if (m->fs.fp_valid)
12174 /* If the stack pointer is valid and pointing at the frame
12175 pointer store address, then we only need a pop. */
12176 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12177 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12178 /* Leave results in shorter dependency chains on CPUs that are
12179 able to grok it fast. */
12180 else if (TARGET_USE_LEAVE
12181 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12182 || !cfun->machine->use_fast_prologue_epilogue)
12183 ix86_emit_leave ();
12184 else
12186 pro_epilogue_adjust_stack (stack_pointer_rtx,
12187 hard_frame_pointer_rtx,
12188 const0_rtx, style, !using_drap);
12189 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12193 if (using_drap)
12195 int param_ptr_offset = UNITS_PER_WORD;
12196 rtx insn;
12198 gcc_assert (stack_realign_drap);
12200 if (ix86_static_chain_on_stack)
12201 param_ptr_offset += UNITS_PER_WORD;
12202 if (!call_used_regs[REGNO (crtl->drap_reg)])
12203 param_ptr_offset += UNITS_PER_WORD;
12205 insn = emit_insn (gen_rtx_SET
12206 (VOIDmode, stack_pointer_rtx,
12207 gen_rtx_PLUS (Pmode,
12208 crtl->drap_reg,
12209 GEN_INT (-param_ptr_offset))));
12210 m->fs.cfa_reg = stack_pointer_rtx;
12211 m->fs.cfa_offset = param_ptr_offset;
12212 m->fs.sp_offset = param_ptr_offset;
12213 m->fs.realigned = false;
12215 add_reg_note (insn, REG_CFA_DEF_CFA,
12216 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12217 GEN_INT (param_ptr_offset)));
12218 RTX_FRAME_RELATED_P (insn) = 1;
12220 if (!call_used_regs[REGNO (crtl->drap_reg)])
12221 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12224 /* At this point the stack pointer must be valid, and we must have
12225 restored all of the registers. We may not have deallocated the
12226 entire stack frame. We've delayed this until now because it may
12227 be possible to merge the local stack deallocation with the
12228 deallocation forced by ix86_static_chain_on_stack. */
12229 gcc_assert (m->fs.sp_valid);
12230 gcc_assert (!m->fs.fp_valid);
12231 gcc_assert (!m->fs.realigned);
12232 if (m->fs.sp_offset != UNITS_PER_WORD)
12234 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12235 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12236 style, true);
12238 else
12239 ix86_add_queued_cfa_restore_notes (get_last_insn ());
12241 /* Sibcall epilogues don't want a return instruction. */
12242 if (style == 0)
12244 m->fs = frame_state_save;
12245 return;
12248 if (crtl->args.pops_args && crtl->args.size)
12250 rtx popc = GEN_INT (crtl->args.pops_args);
12252 /* i386 can only pop 64K bytes. If asked to pop more, pop return
12253 address, do explicit add, and jump indirectly to the caller. */
12255 if (crtl->args.pops_args >= 65536)
12257 rtx ecx = gen_rtx_REG (SImode, CX_REG);
12258 rtx insn;
12260 /* There is no "pascal" calling convention in any 64bit ABI. */
12261 gcc_assert (!TARGET_64BIT);
12263 insn = emit_insn (gen_pop (ecx));
12264 m->fs.cfa_offset -= UNITS_PER_WORD;
12265 m->fs.sp_offset -= UNITS_PER_WORD;
12267 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12268 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
12269 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12270 add_reg_note (insn, REG_CFA_REGISTER,
12271 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
12272 RTX_FRAME_RELATED_P (insn) = 1;
12274 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12275 popc, -1, true);
12276 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12278 else
12279 emit_jump_insn (gen_simple_return_pop_internal (popc));
12281 else
12282 emit_jump_insn (gen_simple_return_internal ());
12284 /* Restore the state back to the state from the prologue,
12285 so that it's correct for the next epilogue. */
12286 m->fs = frame_state_save;
12289 /* Reset from the function's potential modifications. */
12291 static void
12292 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12294 if (pic_offset_table_rtx
12295 && !ix86_use_pseudo_pic_reg ())
12296 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12297 #if TARGET_MACHO
12298 /* Mach-O doesn't support labels at the end of objects, so if
12299 it looks like we might want one, insert a NOP. */
12301 rtx_insn *insn = get_last_insn ();
12302 rtx_insn *deleted_debug_label = NULL;
12303 while (insn
12304 && NOTE_P (insn)
12305 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12307 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12308 notes only, instead set their CODE_LABEL_NUMBER to -1,
12309 otherwise there would be code generation differences
12310 in between -g and -g0. */
12311 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12312 deleted_debug_label = insn;
12313 insn = PREV_INSN (insn);
12315 if (insn
12316 && (LABEL_P (insn)
12317 || (NOTE_P (insn)
12318 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12319 fputs ("\tnop\n", file);
12320 else if (deleted_debug_label)
12321 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12322 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12323 CODE_LABEL_NUMBER (insn) = -1;
12325 #endif
12329 /* Return a scratch register to use in the split stack prologue. The
12330 split stack prologue is used for -fsplit-stack. It is the first
12331 instructions in the function, even before the regular prologue.
12332 The scratch register can be any caller-saved register which is not
12333 used for parameters or for the static chain. */
12335 static unsigned int
12336 split_stack_prologue_scratch_regno (void)
12338 if (TARGET_64BIT)
12339 return R11_REG;
12340 else
12342 bool is_fastcall, is_thiscall;
12343 int regparm;
12345 is_fastcall = (lookup_attribute ("fastcall",
12346 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12347 != NULL);
12348 is_thiscall = (lookup_attribute ("thiscall",
12349 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12350 != NULL);
12351 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12353 if (is_fastcall)
12355 if (DECL_STATIC_CHAIN (cfun->decl))
12357 sorry ("-fsplit-stack does not support fastcall with "
12358 "nested function");
12359 return INVALID_REGNUM;
12361 return AX_REG;
12363 else if (is_thiscall)
12365 if (!DECL_STATIC_CHAIN (cfun->decl))
12366 return DX_REG;
12367 return AX_REG;
12369 else if (regparm < 3)
12371 if (!DECL_STATIC_CHAIN (cfun->decl))
12372 return CX_REG;
12373 else
12375 if (regparm >= 2)
12377 sorry ("-fsplit-stack does not support 2 register "
12378 "parameters for a nested function");
12379 return INVALID_REGNUM;
12381 return DX_REG;
12384 else
12386 /* FIXME: We could make this work by pushing a register
12387 around the addition and comparison. */
12388 sorry ("-fsplit-stack does not support 3 register parameters");
12389 return INVALID_REGNUM;
12394 /* A SYMBOL_REF for the function which allocates new stackspace for
12395 -fsplit-stack. */
12397 static GTY(()) rtx split_stack_fn;
12399 /* A SYMBOL_REF for the more stack function when using the large
12400 model. */
12402 static GTY(()) rtx split_stack_fn_large;
12404 /* Handle -fsplit-stack. These are the first instructions in the
12405 function, even before the regular prologue. */
12407 void
12408 ix86_expand_split_stack_prologue (void)
12410 struct ix86_frame frame;
12411 HOST_WIDE_INT allocate;
12412 unsigned HOST_WIDE_INT args_size;
12413 rtx_code_label *label;
12414 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12415 rtx scratch_reg = NULL_RTX;
12416 rtx_code_label *varargs_label = NULL;
12417 rtx fn;
12419 gcc_assert (flag_split_stack && reload_completed);
12421 ix86_finalize_stack_realign_flags ();
12422 ix86_compute_frame_layout (&frame);
12423 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12425 /* This is the label we will branch to if we have enough stack
12426 space. We expect the basic block reordering pass to reverse this
12427 branch if optimizing, so that we branch in the unlikely case. */
12428 label = gen_label_rtx ();
12430 /* We need to compare the stack pointer minus the frame size with
12431 the stack boundary in the TCB. The stack boundary always gives
12432 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12433 can compare directly. Otherwise we need to do an addition. */
12435 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12436 UNSPEC_STACK_CHECK);
12437 limit = gen_rtx_CONST (Pmode, limit);
12438 limit = gen_rtx_MEM (Pmode, limit);
12439 if (allocate < SPLIT_STACK_AVAILABLE)
12440 current = stack_pointer_rtx;
12441 else
12443 unsigned int scratch_regno;
12444 rtx offset;
12446 /* We need a scratch register to hold the stack pointer minus
12447 the required frame size. Since this is the very start of the
12448 function, the scratch register can be any caller-saved
12449 register which is not used for parameters. */
12450 offset = GEN_INT (- allocate);
12451 scratch_regno = split_stack_prologue_scratch_regno ();
12452 if (scratch_regno == INVALID_REGNUM)
12453 return;
12454 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12455 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12457 /* We don't use ix86_gen_add3 in this case because it will
12458 want to split to lea, but when not optimizing the insn
12459 will not be split after this point. */
12460 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12461 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12462 offset)));
12464 else
12466 emit_move_insn (scratch_reg, offset);
12467 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12468 stack_pointer_rtx));
12470 current = scratch_reg;
12473 ix86_expand_branch (GEU, current, limit, label);
12474 jump_insn = get_last_insn ();
12475 JUMP_LABEL (jump_insn) = label;
12477 /* Mark the jump as very likely to be taken. */
12478 add_int_reg_note (jump_insn, REG_BR_PROB,
12479 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12481 if (split_stack_fn == NULL_RTX)
12483 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12484 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12486 fn = split_stack_fn;
12488 /* Get more stack space. We pass in the desired stack space and the
12489 size of the arguments to copy to the new stack. In 32-bit mode
12490 we push the parameters; __morestack will return on a new stack
12491 anyhow. In 64-bit mode we pass the parameters in r10 and
12492 r11. */
12493 allocate_rtx = GEN_INT (allocate);
12494 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12495 call_fusage = NULL_RTX;
12496 if (TARGET_64BIT)
12498 rtx reg10, reg11;
12500 reg10 = gen_rtx_REG (Pmode, R10_REG);
12501 reg11 = gen_rtx_REG (Pmode, R11_REG);
12503 /* If this function uses a static chain, it will be in %r10.
12504 Preserve it across the call to __morestack. */
12505 if (DECL_STATIC_CHAIN (cfun->decl))
12507 rtx rax;
12509 rax = gen_rtx_REG (word_mode, AX_REG);
12510 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12511 use_reg (&call_fusage, rax);
12514 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12515 && !TARGET_PECOFF)
12517 HOST_WIDE_INT argval;
12519 gcc_assert (Pmode == DImode);
12520 /* When using the large model we need to load the address
12521 into a register, and we've run out of registers. So we
12522 switch to a different calling convention, and we call a
12523 different function: __morestack_large. We pass the
12524 argument size in the upper 32 bits of r10 and pass the
12525 frame size in the lower 32 bits. */
12526 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12527 gcc_assert ((args_size & 0xffffffff) == args_size);
12529 if (split_stack_fn_large == NULL_RTX)
12531 split_stack_fn_large =
12532 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12533 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12535 if (ix86_cmodel == CM_LARGE_PIC)
12537 rtx_code_label *label;
12538 rtx x;
12540 label = gen_label_rtx ();
12541 emit_label (label);
12542 LABEL_PRESERVE_P (label) = 1;
12543 emit_insn (gen_set_rip_rex64 (reg10, label));
12544 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12545 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12546 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12547 UNSPEC_GOT);
12548 x = gen_rtx_CONST (Pmode, x);
12549 emit_move_insn (reg11, x);
12550 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12551 x = gen_const_mem (Pmode, x);
12552 emit_move_insn (reg11, x);
12554 else
12555 emit_move_insn (reg11, split_stack_fn_large);
12557 fn = reg11;
12559 argval = ((args_size << 16) << 16) + allocate;
12560 emit_move_insn (reg10, GEN_INT (argval));
12562 else
12564 emit_move_insn (reg10, allocate_rtx);
12565 emit_move_insn (reg11, GEN_INT (args_size));
12566 use_reg (&call_fusage, reg11);
12569 use_reg (&call_fusage, reg10);
12571 else
12573 emit_insn (gen_push (GEN_INT (args_size)));
12574 emit_insn (gen_push (allocate_rtx));
12576 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12577 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12578 NULL_RTX, false);
12579 add_function_usage_to (call_insn, call_fusage);
12581 /* In order to make call/return prediction work right, we now need
12582 to execute a return instruction. See
12583 libgcc/config/i386/morestack.S for the details on how this works.
12585 For flow purposes gcc must not see this as a return
12586 instruction--we need control flow to continue at the subsequent
12587 label. Therefore, we use an unspec. */
12588 gcc_assert (crtl->args.pops_args < 65536);
12589 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12591 /* If we are in 64-bit mode and this function uses a static chain,
12592 we saved %r10 in %rax before calling _morestack. */
12593 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12594 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12595 gen_rtx_REG (word_mode, AX_REG));
12597 /* If this function calls va_start, we need to store a pointer to
12598 the arguments on the old stack, because they may not have been
12599 all copied to the new stack. At this point the old stack can be
12600 found at the frame pointer value used by __morestack, because
12601 __morestack has set that up before calling back to us. Here we
12602 store that pointer in a scratch register, and in
12603 ix86_expand_prologue we store the scratch register in a stack
12604 slot. */
12605 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12607 unsigned int scratch_regno;
12608 rtx frame_reg;
12609 int words;
12611 scratch_regno = split_stack_prologue_scratch_regno ();
12612 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12613 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12615 /* 64-bit:
12616 fp -> old fp value
12617 return address within this function
12618 return address of caller of this function
12619 stack arguments
12620 So we add three words to get to the stack arguments.
12622 32-bit:
12623 fp -> old fp value
12624 return address within this function
12625 first argument to __morestack
12626 second argument to __morestack
12627 return address of caller of this function
12628 stack arguments
12629 So we add five words to get to the stack arguments.
12631 words = TARGET_64BIT ? 3 : 5;
12632 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12633 gen_rtx_PLUS (Pmode, frame_reg,
12634 GEN_INT (words * UNITS_PER_WORD))));
12636 varargs_label = gen_label_rtx ();
12637 emit_jump_insn (gen_jump (varargs_label));
12638 JUMP_LABEL (get_last_insn ()) = varargs_label;
12640 emit_barrier ();
12643 emit_label (label);
12644 LABEL_NUSES (label) = 1;
12646 /* If this function calls va_start, we now have to set the scratch
12647 register for the case where we do not call __morestack. In this
12648 case we need to set it based on the stack pointer. */
12649 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12651 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12652 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12653 GEN_INT (UNITS_PER_WORD))));
12655 emit_label (varargs_label);
12656 LABEL_NUSES (varargs_label) = 1;
12660 /* We may have to tell the dataflow pass that the split stack prologue
12661 is initializing a scratch register. */
12663 static void
12664 ix86_live_on_entry (bitmap regs)
12666 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12668 gcc_assert (flag_split_stack);
12669 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12673 /* Extract the parts of an RTL expression that is a valid memory address
12674 for an instruction. Return 0 if the structure of the address is
12675 grossly off. Return -1 if the address contains ASHIFT, so it is not
12676 strictly valid, but still used for computing length of lea instruction. */
12679 ix86_decompose_address (rtx addr, struct ix86_address *out)
12681 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12682 rtx base_reg, index_reg;
12683 HOST_WIDE_INT scale = 1;
12684 rtx scale_rtx = NULL_RTX;
12685 rtx tmp;
12686 int retval = 1;
12687 enum ix86_address_seg seg = SEG_DEFAULT;
12689 /* Allow zero-extended SImode addresses,
12690 they will be emitted with addr32 prefix. */
12691 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12693 if (GET_CODE (addr) == ZERO_EXTEND
12694 && GET_MODE (XEXP (addr, 0)) == SImode)
12696 addr = XEXP (addr, 0);
12697 if (CONST_INT_P (addr))
12698 return 0;
12700 else if (GET_CODE (addr) == AND
12701 && const_32bit_mask (XEXP (addr, 1), DImode))
12703 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12704 if (addr == NULL_RTX)
12705 return 0;
12707 if (CONST_INT_P (addr))
12708 return 0;
12712 /* Allow SImode subregs of DImode addresses,
12713 they will be emitted with addr32 prefix. */
12714 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12716 if (GET_CODE (addr) == SUBREG
12717 && GET_MODE (SUBREG_REG (addr)) == DImode)
12719 addr = SUBREG_REG (addr);
12720 if (CONST_INT_P (addr))
12721 return 0;
12725 if (REG_P (addr))
12726 base = addr;
12727 else if (GET_CODE (addr) == SUBREG)
12729 if (REG_P (SUBREG_REG (addr)))
12730 base = addr;
12731 else
12732 return 0;
12734 else if (GET_CODE (addr) == PLUS)
12736 rtx addends[4], op;
12737 int n = 0, i;
12739 op = addr;
12742 if (n >= 4)
12743 return 0;
12744 addends[n++] = XEXP (op, 1);
12745 op = XEXP (op, 0);
12747 while (GET_CODE (op) == PLUS);
12748 if (n >= 4)
12749 return 0;
12750 addends[n] = op;
12752 for (i = n; i >= 0; --i)
12754 op = addends[i];
12755 switch (GET_CODE (op))
12757 case MULT:
12758 if (index)
12759 return 0;
12760 index = XEXP (op, 0);
12761 scale_rtx = XEXP (op, 1);
12762 break;
12764 case ASHIFT:
12765 if (index)
12766 return 0;
12767 index = XEXP (op, 0);
12768 tmp = XEXP (op, 1);
12769 if (!CONST_INT_P (tmp))
12770 return 0;
12771 scale = INTVAL (tmp);
12772 if ((unsigned HOST_WIDE_INT) scale > 3)
12773 return 0;
12774 scale = 1 << scale;
12775 break;
12777 case ZERO_EXTEND:
12778 op = XEXP (op, 0);
12779 if (GET_CODE (op) != UNSPEC)
12780 return 0;
12781 /* FALLTHRU */
12783 case UNSPEC:
12784 if (XINT (op, 1) == UNSPEC_TP
12785 && TARGET_TLS_DIRECT_SEG_REFS
12786 && seg == SEG_DEFAULT)
12787 seg = DEFAULT_TLS_SEG_REG;
12788 else
12789 return 0;
12790 break;
12792 case SUBREG:
12793 if (!REG_P (SUBREG_REG (op)))
12794 return 0;
12795 /* FALLTHRU */
12797 case REG:
12798 if (!base)
12799 base = op;
12800 else if (!index)
12801 index = op;
12802 else
12803 return 0;
12804 break;
12806 case CONST:
12807 case CONST_INT:
12808 case SYMBOL_REF:
12809 case LABEL_REF:
12810 if (disp)
12811 return 0;
12812 disp = op;
12813 break;
12815 default:
12816 return 0;
12820 else if (GET_CODE (addr) == MULT)
12822 index = XEXP (addr, 0); /* index*scale */
12823 scale_rtx = XEXP (addr, 1);
12825 else if (GET_CODE (addr) == ASHIFT)
12827 /* We're called for lea too, which implements ashift on occasion. */
12828 index = XEXP (addr, 0);
12829 tmp = XEXP (addr, 1);
12830 if (!CONST_INT_P (tmp))
12831 return 0;
12832 scale = INTVAL (tmp);
12833 if ((unsigned HOST_WIDE_INT) scale > 3)
12834 return 0;
12835 scale = 1 << scale;
12836 retval = -1;
12838 else
12839 disp = addr; /* displacement */
12841 if (index)
12843 if (REG_P (index))
12845 else if (GET_CODE (index) == SUBREG
12846 && REG_P (SUBREG_REG (index)))
12848 else
12849 return 0;
12852 /* Extract the integral value of scale. */
12853 if (scale_rtx)
12855 if (!CONST_INT_P (scale_rtx))
12856 return 0;
12857 scale = INTVAL (scale_rtx);
12860 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12861 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12863 /* Avoid useless 0 displacement. */
12864 if (disp == const0_rtx && (base || index))
12865 disp = NULL_RTX;
12867 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12868 if (base_reg && index_reg && scale == 1
12869 && (index_reg == arg_pointer_rtx
12870 || index_reg == frame_pointer_rtx
12871 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12873 std::swap (base, index);
12874 std::swap (base_reg, index_reg);
12877 /* Special case: %ebp cannot be encoded as a base without a displacement.
12878 Similarly %r13. */
12879 if (!disp
12880 && base_reg
12881 && (base_reg == hard_frame_pointer_rtx
12882 || base_reg == frame_pointer_rtx
12883 || base_reg == arg_pointer_rtx
12884 || (REG_P (base_reg)
12885 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12886 || REGNO (base_reg) == R13_REG))))
12887 disp = const0_rtx;
12889 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12890 Avoid this by transforming to [%esi+0].
12891 Reload calls address legitimization without cfun defined, so we need
12892 to test cfun for being non-NULL. */
12893 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12894 && base_reg && !index_reg && !disp
12895 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12896 disp = const0_rtx;
12898 /* Special case: encode reg+reg instead of reg*2. */
12899 if (!base && index && scale == 2)
12900 base = index, base_reg = index_reg, scale = 1;
12902 /* Special case: scaling cannot be encoded without base or displacement. */
12903 if (!base && !disp && index && scale != 1)
12904 disp = const0_rtx;
12906 out->base = base;
12907 out->index = index;
12908 out->disp = disp;
12909 out->scale = scale;
12910 out->seg = seg;
12912 return retval;
12915 /* Return cost of the memory address x.
12916 For i386, it is better to use a complex address than let gcc copy
12917 the address into a reg and make a new pseudo. But not if the address
12918 requires to two regs - that would mean more pseudos with longer
12919 lifetimes. */
12920 static int
12921 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
12923 struct ix86_address parts;
12924 int cost = 1;
12925 int ok = ix86_decompose_address (x, &parts);
12927 gcc_assert (ok);
12929 if (parts.base && GET_CODE (parts.base) == SUBREG)
12930 parts.base = SUBREG_REG (parts.base);
12931 if (parts.index && GET_CODE (parts.index) == SUBREG)
12932 parts.index = SUBREG_REG (parts.index);
12934 /* Attempt to minimize number of registers in the address by increasing
12935 address cost for each used register. We don't increase address cost
12936 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
12937 is not invariant itself it most likely means that base or index is not
12938 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
12939 which is not profitable for x86. */
12940 if (parts.base
12941 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12942 && (current_pass->type == GIMPLE_PASS
12943 || !pic_offset_table_rtx
12944 || !REG_P (parts.base)
12945 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
12946 cost++;
12948 if (parts.index
12949 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12950 && (current_pass->type == GIMPLE_PASS
12951 || !pic_offset_table_rtx
12952 || !REG_P (parts.index)
12953 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
12954 cost++;
12956 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12957 since it's predecode logic can't detect the length of instructions
12958 and it degenerates to vector decoded. Increase cost of such
12959 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12960 to split such addresses or even refuse such addresses at all.
12962 Following addressing modes are affected:
12963 [base+scale*index]
12964 [scale*index+disp]
12965 [base+index]
12967 The first and last case may be avoidable by explicitly coding the zero in
12968 memory address, but I don't have AMD-K6 machine handy to check this
12969 theory. */
12971 if (TARGET_K6
12972 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12973 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12974 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12975 cost += 10;
12977 return cost;
12980 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12981 this is used for to form addresses to local data when -fPIC is in
12982 use. */
12984 static bool
12985 darwin_local_data_pic (rtx disp)
12987 return (GET_CODE (disp) == UNSPEC
12988 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12991 /* Determine if a given RTX is a valid constant. We already know this
12992 satisfies CONSTANT_P. */
12994 static bool
12995 ix86_legitimate_constant_p (machine_mode, rtx x)
12997 /* Pointer bounds constants are not valid. */
12998 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
12999 return false;
13001 switch (GET_CODE (x))
13003 case CONST:
13004 x = XEXP (x, 0);
13006 if (GET_CODE (x) == PLUS)
13008 if (!CONST_INT_P (XEXP (x, 1)))
13009 return false;
13010 x = XEXP (x, 0);
13013 if (TARGET_MACHO && darwin_local_data_pic (x))
13014 return true;
13016 /* Only some unspecs are valid as "constants". */
13017 if (GET_CODE (x) == UNSPEC)
13018 switch (XINT (x, 1))
13020 case UNSPEC_GOT:
13021 case UNSPEC_GOTOFF:
13022 case UNSPEC_PLTOFF:
13023 return TARGET_64BIT;
13024 case UNSPEC_TPOFF:
13025 case UNSPEC_NTPOFF:
13026 x = XVECEXP (x, 0, 0);
13027 return (GET_CODE (x) == SYMBOL_REF
13028 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13029 case UNSPEC_DTPOFF:
13030 x = XVECEXP (x, 0, 0);
13031 return (GET_CODE (x) == SYMBOL_REF
13032 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
13033 default:
13034 return false;
13037 /* We must have drilled down to a symbol. */
13038 if (GET_CODE (x) == LABEL_REF)
13039 return true;
13040 if (GET_CODE (x) != SYMBOL_REF)
13041 return false;
13042 /* FALLTHRU */
13044 case SYMBOL_REF:
13045 /* TLS symbols are never valid. */
13046 if (SYMBOL_REF_TLS_MODEL (x))
13047 return false;
13049 /* DLLIMPORT symbols are never valid. */
13050 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13051 && SYMBOL_REF_DLLIMPORT_P (x))
13052 return false;
13054 #if TARGET_MACHO
13055 /* mdynamic-no-pic */
13056 if (MACHO_DYNAMIC_NO_PIC_P)
13057 return machopic_symbol_defined_p (x);
13058 #endif
13059 break;
13061 case CONST_DOUBLE:
13062 if (GET_MODE (x) == TImode
13063 && x != CONST0_RTX (TImode)
13064 && !TARGET_64BIT)
13065 return false;
13066 break;
13068 case CONST_VECTOR:
13069 if (!standard_sse_constant_p (x))
13070 return false;
13072 default:
13073 break;
13076 /* Otherwise we handle everything else in the move patterns. */
13077 return true;
13080 /* Determine if it's legal to put X into the constant pool. This
13081 is not possible for the address of thread-local symbols, which
13082 is checked above. */
13084 static bool
13085 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
13087 /* We can always put integral constants and vectors in memory. */
13088 switch (GET_CODE (x))
13090 case CONST_INT:
13091 case CONST_DOUBLE:
13092 case CONST_VECTOR:
13093 return false;
13095 default:
13096 break;
13098 return !ix86_legitimate_constant_p (mode, x);
13101 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
13102 otherwise zero. */
13104 static bool
13105 is_imported_p (rtx x)
13107 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
13108 || GET_CODE (x) != SYMBOL_REF)
13109 return false;
13111 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
13115 /* Nonzero if the constant value X is a legitimate general operand
13116 when generating PIC code. It is given that flag_pic is on and
13117 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
13119 bool
13120 legitimate_pic_operand_p (rtx x)
13122 rtx inner;
13124 switch (GET_CODE (x))
13126 case CONST:
13127 inner = XEXP (x, 0);
13128 if (GET_CODE (inner) == PLUS
13129 && CONST_INT_P (XEXP (inner, 1)))
13130 inner = XEXP (inner, 0);
13132 /* Only some unspecs are valid as "constants". */
13133 if (GET_CODE (inner) == UNSPEC)
13134 switch (XINT (inner, 1))
13136 case UNSPEC_GOT:
13137 case UNSPEC_GOTOFF:
13138 case UNSPEC_PLTOFF:
13139 return TARGET_64BIT;
13140 case UNSPEC_TPOFF:
13141 x = XVECEXP (inner, 0, 0);
13142 return (GET_CODE (x) == SYMBOL_REF
13143 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13144 case UNSPEC_MACHOPIC_OFFSET:
13145 return legitimate_pic_address_disp_p (x);
13146 default:
13147 return false;
13149 /* FALLTHRU */
13151 case SYMBOL_REF:
13152 case LABEL_REF:
13153 return legitimate_pic_address_disp_p (x);
13155 default:
13156 return true;
13160 /* Determine if a given CONST RTX is a valid memory displacement
13161 in PIC mode. */
13163 bool
13164 legitimate_pic_address_disp_p (rtx disp)
13166 bool saw_plus;
13168 /* In 64bit mode we can allow direct addresses of symbols and labels
13169 when they are not dynamic symbols. */
13170 if (TARGET_64BIT)
13172 rtx op0 = disp, op1;
13174 switch (GET_CODE (disp))
13176 case LABEL_REF:
13177 return true;
13179 case CONST:
13180 if (GET_CODE (XEXP (disp, 0)) != PLUS)
13181 break;
13182 op0 = XEXP (XEXP (disp, 0), 0);
13183 op1 = XEXP (XEXP (disp, 0), 1);
13184 if (!CONST_INT_P (op1)
13185 || INTVAL (op1) >= 16*1024*1024
13186 || INTVAL (op1) < -16*1024*1024)
13187 break;
13188 if (GET_CODE (op0) == LABEL_REF)
13189 return true;
13190 if (GET_CODE (op0) == CONST
13191 && GET_CODE (XEXP (op0, 0)) == UNSPEC
13192 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13193 return true;
13194 if (GET_CODE (op0) == UNSPEC
13195 && XINT (op0, 1) == UNSPEC_PCREL)
13196 return true;
13197 if (GET_CODE (op0) != SYMBOL_REF)
13198 break;
13199 /* FALLTHRU */
13201 case SYMBOL_REF:
13202 /* TLS references should always be enclosed in UNSPEC.
13203 The dllimported symbol needs always to be resolved. */
13204 if (SYMBOL_REF_TLS_MODEL (op0)
13205 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13206 return false;
13208 if (TARGET_PECOFF)
13210 if (is_imported_p (op0))
13211 return true;
13213 if (SYMBOL_REF_FAR_ADDR_P (op0)
13214 || !SYMBOL_REF_LOCAL_P (op0))
13215 break;
13217 /* Function-symbols need to be resolved only for
13218 large-model.
13219 For the small-model we don't need to resolve anything
13220 here. */
13221 if ((ix86_cmodel != CM_LARGE_PIC
13222 && SYMBOL_REF_FUNCTION_P (op0))
13223 || ix86_cmodel == CM_SMALL_PIC)
13224 return true;
13225 /* Non-external symbols don't need to be resolved for
13226 large, and medium-model. */
13227 if ((ix86_cmodel == CM_LARGE_PIC
13228 || ix86_cmodel == CM_MEDIUM_PIC)
13229 && !SYMBOL_REF_EXTERNAL_P (op0))
13230 return true;
13232 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13233 && (SYMBOL_REF_LOCAL_P (op0)
13234 || (HAVE_LD_PIE_COPYRELOC
13235 && flag_pie
13236 && !SYMBOL_REF_WEAK (op0)
13237 && !SYMBOL_REF_FUNCTION_P (op0)))
13238 && ix86_cmodel != CM_LARGE_PIC)
13239 return true;
13240 break;
13242 default:
13243 break;
13246 if (GET_CODE (disp) != CONST)
13247 return false;
13248 disp = XEXP (disp, 0);
13250 if (TARGET_64BIT)
13252 /* We are unsafe to allow PLUS expressions. This limit allowed distance
13253 of GOT tables. We should not need these anyway. */
13254 if (GET_CODE (disp) != UNSPEC
13255 || (XINT (disp, 1) != UNSPEC_GOTPCREL
13256 && XINT (disp, 1) != UNSPEC_GOTOFF
13257 && XINT (disp, 1) != UNSPEC_PCREL
13258 && XINT (disp, 1) != UNSPEC_PLTOFF))
13259 return false;
13261 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13262 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13263 return false;
13264 return true;
13267 saw_plus = false;
13268 if (GET_CODE (disp) == PLUS)
13270 if (!CONST_INT_P (XEXP (disp, 1)))
13271 return false;
13272 disp = XEXP (disp, 0);
13273 saw_plus = true;
13276 if (TARGET_MACHO && darwin_local_data_pic (disp))
13277 return true;
13279 if (GET_CODE (disp) != UNSPEC)
13280 return false;
13282 switch (XINT (disp, 1))
13284 case UNSPEC_GOT:
13285 if (saw_plus)
13286 return false;
13287 /* We need to check for both symbols and labels because VxWorks loads
13288 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
13289 details. */
13290 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13291 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13292 case UNSPEC_GOTOFF:
13293 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13294 While ABI specify also 32bit relocation but we don't produce it in
13295 small PIC model at all. */
13296 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13297 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13298 && !TARGET_64BIT)
13299 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13300 return false;
13301 case UNSPEC_GOTTPOFF:
13302 case UNSPEC_GOTNTPOFF:
13303 case UNSPEC_INDNTPOFF:
13304 if (saw_plus)
13305 return false;
13306 disp = XVECEXP (disp, 0, 0);
13307 return (GET_CODE (disp) == SYMBOL_REF
13308 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13309 case UNSPEC_NTPOFF:
13310 disp = XVECEXP (disp, 0, 0);
13311 return (GET_CODE (disp) == SYMBOL_REF
13312 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13313 case UNSPEC_DTPOFF:
13314 disp = XVECEXP (disp, 0, 0);
13315 return (GET_CODE (disp) == SYMBOL_REF
13316 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13319 return false;
13322 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
13323 replace the input X, or the original X if no replacement is called for.
13324 The output parameter *WIN is 1 if the calling macro should goto WIN,
13325 0 if it should not. */
13327 bool
13328 ix86_legitimize_reload_address (rtx x, machine_mode, int opnum, int type,
13329 int)
13331 /* Reload can generate:
13333 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
13334 (reg:DI 97))
13335 (reg:DI 2 cx))
13337 This RTX is rejected from ix86_legitimate_address_p due to
13338 non-strictness of base register 97. Following this rejection,
13339 reload pushes all three components into separate registers,
13340 creating invalid memory address RTX.
13342 Following code reloads only the invalid part of the
13343 memory address RTX. */
13345 if (GET_CODE (x) == PLUS
13346 && REG_P (XEXP (x, 1))
13347 && GET_CODE (XEXP (x, 0)) == PLUS
13348 && REG_P (XEXP (XEXP (x, 0), 1)))
13350 rtx base, index;
13351 bool something_reloaded = false;
13353 base = XEXP (XEXP (x, 0), 1);
13354 if (!REG_OK_FOR_BASE_STRICT_P (base))
13356 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
13357 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13358 opnum, (enum reload_type) type);
13359 something_reloaded = true;
13362 index = XEXP (x, 1);
13363 if (!REG_OK_FOR_INDEX_STRICT_P (index))
13365 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
13366 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13367 opnum, (enum reload_type) type);
13368 something_reloaded = true;
13371 gcc_assert (something_reloaded);
13372 return true;
13375 return false;
13378 /* Determine if op is suitable RTX for an address register.
13379 Return naked register if a register or a register subreg is
13380 found, otherwise return NULL_RTX. */
13382 static rtx
13383 ix86_validate_address_register (rtx op)
13385 machine_mode mode = GET_MODE (op);
13387 /* Only SImode or DImode registers can form the address. */
13388 if (mode != SImode && mode != DImode)
13389 return NULL_RTX;
13391 if (REG_P (op))
13392 return op;
13393 else if (GET_CODE (op) == SUBREG)
13395 rtx reg = SUBREG_REG (op);
13397 if (!REG_P (reg))
13398 return NULL_RTX;
13400 mode = GET_MODE (reg);
13402 /* Don't allow SUBREGs that span more than a word. It can
13403 lead to spill failures when the register is one word out
13404 of a two word structure. */
13405 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13406 return NULL_RTX;
13408 /* Allow only SUBREGs of non-eliminable hard registers. */
13409 if (register_no_elim_operand (reg, mode))
13410 return reg;
13413 /* Op is not a register. */
13414 return NULL_RTX;
13417 /* Recognizes RTL expressions that are valid memory addresses for an
13418 instruction. The MODE argument is the machine mode for the MEM
13419 expression that wants to use this address.
13421 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13422 convert common non-canonical forms to canonical form so that they will
13423 be recognized. */
13425 static bool
13426 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13428 struct ix86_address parts;
13429 rtx base, index, disp;
13430 HOST_WIDE_INT scale;
13431 enum ix86_address_seg seg;
13433 if (ix86_decompose_address (addr, &parts) <= 0)
13434 /* Decomposition failed. */
13435 return false;
13437 base = parts.base;
13438 index = parts.index;
13439 disp = parts.disp;
13440 scale = parts.scale;
13441 seg = parts.seg;
13443 /* Validate base register. */
13444 if (base)
13446 rtx reg = ix86_validate_address_register (base);
13448 if (reg == NULL_RTX)
13449 return false;
13451 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13452 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13453 /* Base is not valid. */
13454 return false;
13457 /* Validate index register. */
13458 if (index)
13460 rtx reg = ix86_validate_address_register (index);
13462 if (reg == NULL_RTX)
13463 return false;
13465 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13466 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13467 /* Index is not valid. */
13468 return false;
13471 /* Index and base should have the same mode. */
13472 if (base && index
13473 && GET_MODE (base) != GET_MODE (index))
13474 return false;
13476 /* Address override works only on the (%reg) part of %fs:(%reg). */
13477 if (seg != SEG_DEFAULT
13478 && ((base && GET_MODE (base) != word_mode)
13479 || (index && GET_MODE (index) != word_mode)))
13480 return false;
13482 /* Validate scale factor. */
13483 if (scale != 1)
13485 if (!index)
13486 /* Scale without index. */
13487 return false;
13489 if (scale != 2 && scale != 4 && scale != 8)
13490 /* Scale is not a valid multiplier. */
13491 return false;
13494 /* Validate displacement. */
13495 if (disp)
13497 if (GET_CODE (disp) == CONST
13498 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13499 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13500 switch (XINT (XEXP (disp, 0), 1))
13502 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13503 used. While ABI specify also 32bit relocations, we don't produce
13504 them at all and use IP relative instead. */
13505 case UNSPEC_GOT:
13506 case UNSPEC_GOTOFF:
13507 gcc_assert (flag_pic);
13508 if (!TARGET_64BIT)
13509 goto is_legitimate_pic;
13511 /* 64bit address unspec. */
13512 return false;
13514 case UNSPEC_GOTPCREL:
13515 case UNSPEC_PCREL:
13516 gcc_assert (flag_pic);
13517 goto is_legitimate_pic;
13519 case UNSPEC_GOTTPOFF:
13520 case UNSPEC_GOTNTPOFF:
13521 case UNSPEC_INDNTPOFF:
13522 case UNSPEC_NTPOFF:
13523 case UNSPEC_DTPOFF:
13524 break;
13526 case UNSPEC_STACK_CHECK:
13527 gcc_assert (flag_split_stack);
13528 break;
13530 default:
13531 /* Invalid address unspec. */
13532 return false;
13535 else if (SYMBOLIC_CONST (disp)
13536 && (flag_pic
13537 || (TARGET_MACHO
13538 #if TARGET_MACHO
13539 && MACHOPIC_INDIRECT
13540 && !machopic_operand_p (disp)
13541 #endif
13545 is_legitimate_pic:
13546 if (TARGET_64BIT && (index || base))
13548 /* foo@dtpoff(%rX) is ok. */
13549 if (GET_CODE (disp) != CONST
13550 || GET_CODE (XEXP (disp, 0)) != PLUS
13551 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13552 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13553 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13554 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13555 /* Non-constant pic memory reference. */
13556 return false;
13558 else if ((!TARGET_MACHO || flag_pic)
13559 && ! legitimate_pic_address_disp_p (disp))
13560 /* Displacement is an invalid pic construct. */
13561 return false;
13562 #if TARGET_MACHO
13563 else if (MACHO_DYNAMIC_NO_PIC_P
13564 && !ix86_legitimate_constant_p (Pmode, disp))
13565 /* displacment must be referenced via non_lazy_pointer */
13566 return false;
13567 #endif
13569 /* This code used to verify that a symbolic pic displacement
13570 includes the pic_offset_table_rtx register.
13572 While this is good idea, unfortunately these constructs may
13573 be created by "adds using lea" optimization for incorrect
13574 code like:
13576 int a;
13577 int foo(int i)
13579 return *(&a+i);
13582 This code is nonsensical, but results in addressing
13583 GOT table with pic_offset_table_rtx base. We can't
13584 just refuse it easily, since it gets matched by
13585 "addsi3" pattern, that later gets split to lea in the
13586 case output register differs from input. While this
13587 can be handled by separate addsi pattern for this case
13588 that never results in lea, this seems to be easier and
13589 correct fix for crash to disable this test. */
13591 else if (GET_CODE (disp) != LABEL_REF
13592 && !CONST_INT_P (disp)
13593 && (GET_CODE (disp) != CONST
13594 || !ix86_legitimate_constant_p (Pmode, disp))
13595 && (GET_CODE (disp) != SYMBOL_REF
13596 || !ix86_legitimate_constant_p (Pmode, disp)))
13597 /* Displacement is not constant. */
13598 return false;
13599 else if (TARGET_64BIT
13600 && !x86_64_immediate_operand (disp, VOIDmode))
13601 /* Displacement is out of range. */
13602 return false;
13603 /* In x32 mode, constant addresses are sign extended to 64bit, so
13604 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13605 else if (TARGET_X32 && !(index || base)
13606 && CONST_INT_P (disp)
13607 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13608 return false;
13611 /* Everything looks valid. */
13612 return true;
13615 /* Determine if a given RTX is a valid constant address. */
13617 bool
13618 constant_address_p (rtx x)
13620 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13623 /* Return a unique alias set for the GOT. */
13625 static alias_set_type
13626 ix86_GOT_alias_set (void)
13628 static alias_set_type set = -1;
13629 if (set == -1)
13630 set = new_alias_set ();
13631 return set;
13634 /* Set regs_ever_live for PIC base address register
13635 to true if required. */
13636 static void
13637 set_pic_reg_ever_live ()
13639 if (reload_in_progress)
13640 df_set_regs_ever_live (REGNO (pic_offset_table_rtx), true);
13643 /* Return a legitimate reference for ORIG (an address) using the
13644 register REG. If REG is 0, a new pseudo is generated.
13646 There are two types of references that must be handled:
13648 1. Global data references must load the address from the GOT, via
13649 the PIC reg. An insn is emitted to do this load, and the reg is
13650 returned.
13652 2. Static data references, constant pool addresses, and code labels
13653 compute the address as an offset from the GOT, whose base is in
13654 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13655 differentiate them from global data objects. The returned
13656 address is the PIC reg + an unspec constant.
13658 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13659 reg also appears in the address. */
13661 static rtx
13662 legitimize_pic_address (rtx orig, rtx reg)
13664 rtx addr = orig;
13665 rtx new_rtx = orig;
13667 #if TARGET_MACHO
13668 if (TARGET_MACHO && !TARGET_64BIT)
13670 if (reg == 0)
13671 reg = gen_reg_rtx (Pmode);
13672 /* Use the generic Mach-O PIC machinery. */
13673 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13675 #endif
13677 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13679 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13680 if (tmp)
13681 return tmp;
13684 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13685 new_rtx = addr;
13686 else if (TARGET_64BIT && !TARGET_PECOFF
13687 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13689 rtx tmpreg;
13690 /* This symbol may be referenced via a displacement from the PIC
13691 base address (@GOTOFF). */
13693 set_pic_reg_ever_live ();
13694 if (GET_CODE (addr) == CONST)
13695 addr = XEXP (addr, 0);
13696 if (GET_CODE (addr) == PLUS)
13698 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13699 UNSPEC_GOTOFF);
13700 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13702 else
13703 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13704 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13705 if (!reg)
13706 tmpreg = gen_reg_rtx (Pmode);
13707 else
13708 tmpreg = reg;
13709 emit_move_insn (tmpreg, new_rtx);
13711 if (reg != 0)
13713 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13714 tmpreg, 1, OPTAB_DIRECT);
13715 new_rtx = reg;
13717 else
13718 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13720 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13722 /* This symbol may be referenced via a displacement from the PIC
13723 base address (@GOTOFF). */
13725 set_pic_reg_ever_live ();
13726 if (GET_CODE (addr) == CONST)
13727 addr = XEXP (addr, 0);
13728 if (GET_CODE (addr) == PLUS)
13730 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13731 UNSPEC_GOTOFF);
13732 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13734 else
13735 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13736 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13737 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13739 if (reg != 0)
13741 emit_move_insn (reg, new_rtx);
13742 new_rtx = reg;
13745 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13746 /* We can't use @GOTOFF for text labels on VxWorks;
13747 see gotoff_operand. */
13748 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13750 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13751 if (tmp)
13752 return tmp;
13754 /* For x64 PE-COFF there is no GOT table. So we use address
13755 directly. */
13756 if (TARGET_64BIT && TARGET_PECOFF)
13758 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13759 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13761 if (reg == 0)
13762 reg = gen_reg_rtx (Pmode);
13763 emit_move_insn (reg, new_rtx);
13764 new_rtx = reg;
13766 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13768 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13769 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13770 new_rtx = gen_const_mem (Pmode, new_rtx);
13771 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13773 if (reg == 0)
13774 reg = gen_reg_rtx (Pmode);
13775 /* Use directly gen_movsi, otherwise the address is loaded
13776 into register for CSE. We don't want to CSE this addresses,
13777 instead we CSE addresses from the GOT table, so skip this. */
13778 emit_insn (gen_movsi (reg, new_rtx));
13779 new_rtx = reg;
13781 else
13783 /* This symbol must be referenced via a load from the
13784 Global Offset Table (@GOT). */
13786 set_pic_reg_ever_live ();
13787 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13788 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13789 if (TARGET_64BIT)
13790 new_rtx = force_reg (Pmode, new_rtx);
13791 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13792 new_rtx = gen_const_mem (Pmode, new_rtx);
13793 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13795 if (reg == 0)
13796 reg = gen_reg_rtx (Pmode);
13797 emit_move_insn (reg, new_rtx);
13798 new_rtx = reg;
13801 else
13803 if (CONST_INT_P (addr)
13804 && !x86_64_immediate_operand (addr, VOIDmode))
13806 if (reg)
13808 emit_move_insn (reg, addr);
13809 new_rtx = reg;
13811 else
13812 new_rtx = force_reg (Pmode, addr);
13814 else if (GET_CODE (addr) == CONST)
13816 addr = XEXP (addr, 0);
13818 /* We must match stuff we generate before. Assume the only
13819 unspecs that can get here are ours. Not that we could do
13820 anything with them anyway.... */
13821 if (GET_CODE (addr) == UNSPEC
13822 || (GET_CODE (addr) == PLUS
13823 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13824 return orig;
13825 gcc_assert (GET_CODE (addr) == PLUS);
13827 if (GET_CODE (addr) == PLUS)
13829 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13831 /* Check first to see if this is a constant offset from a @GOTOFF
13832 symbol reference. */
13833 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13834 && CONST_INT_P (op1))
13836 if (!TARGET_64BIT)
13838 set_pic_reg_ever_live ();
13839 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13840 UNSPEC_GOTOFF);
13841 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13842 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13843 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13845 if (reg != 0)
13847 emit_move_insn (reg, new_rtx);
13848 new_rtx = reg;
13851 else
13853 if (INTVAL (op1) < -16*1024*1024
13854 || INTVAL (op1) >= 16*1024*1024)
13856 if (!x86_64_immediate_operand (op1, Pmode))
13857 op1 = force_reg (Pmode, op1);
13858 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13862 else
13864 rtx base = legitimize_pic_address (op0, reg);
13865 machine_mode mode = GET_MODE (base);
13866 new_rtx
13867 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13869 if (CONST_INT_P (new_rtx))
13871 if (INTVAL (new_rtx) < -16*1024*1024
13872 || INTVAL (new_rtx) >= 16*1024*1024)
13874 if (!x86_64_immediate_operand (new_rtx, mode))
13875 new_rtx = force_reg (mode, new_rtx);
13876 new_rtx
13877 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13879 else
13880 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13882 else
13884 /* For %rip addressing, we have to use just disp32, not
13885 base nor index. */
13886 if (TARGET_64BIT
13887 && (GET_CODE (base) == SYMBOL_REF
13888 || GET_CODE (base) == LABEL_REF))
13889 base = force_reg (mode, base);
13890 if (GET_CODE (new_rtx) == PLUS
13891 && CONSTANT_P (XEXP (new_rtx, 1)))
13893 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13894 new_rtx = XEXP (new_rtx, 1);
13896 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13901 return new_rtx;
13904 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13906 static rtx
13907 get_thread_pointer (machine_mode tp_mode, bool to_reg)
13909 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13911 if (GET_MODE (tp) != tp_mode)
13913 gcc_assert (GET_MODE (tp) == SImode);
13914 gcc_assert (tp_mode == DImode);
13916 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13919 if (to_reg)
13920 tp = copy_to_mode_reg (tp_mode, tp);
13922 return tp;
13925 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13927 static GTY(()) rtx ix86_tls_symbol;
13929 static rtx
13930 ix86_tls_get_addr (void)
13932 if (!ix86_tls_symbol)
13934 const char *sym
13935 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13936 ? "___tls_get_addr" : "__tls_get_addr");
13938 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13941 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13943 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13944 UNSPEC_PLTOFF);
13945 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13946 gen_rtx_CONST (Pmode, unspec));
13949 return ix86_tls_symbol;
13952 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13954 static GTY(()) rtx ix86_tls_module_base_symbol;
13957 ix86_tls_module_base (void)
13959 if (!ix86_tls_module_base_symbol)
13961 ix86_tls_module_base_symbol
13962 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13964 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13965 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13968 return ix86_tls_module_base_symbol;
13971 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13972 false if we expect this to be used for a memory address and true if
13973 we expect to load the address into a register. */
13975 static rtx
13976 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13978 rtx dest, base, off;
13979 rtx pic = NULL_RTX, tp = NULL_RTX;
13980 machine_mode tp_mode = Pmode;
13981 int type;
13983 /* Fall back to global dynamic model if tool chain cannot support local
13984 dynamic. */
13985 if (TARGET_SUN_TLS && !TARGET_64BIT
13986 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13987 && model == TLS_MODEL_LOCAL_DYNAMIC)
13988 model = TLS_MODEL_GLOBAL_DYNAMIC;
13990 switch (model)
13992 case TLS_MODEL_GLOBAL_DYNAMIC:
13993 dest = gen_reg_rtx (Pmode);
13995 if (!TARGET_64BIT)
13997 if (flag_pic && !TARGET_PECOFF)
13998 pic = pic_offset_table_rtx;
13999 else
14001 pic = gen_reg_rtx (Pmode);
14002 emit_insn (gen_set_got (pic));
14006 if (TARGET_GNU2_TLS)
14008 if (TARGET_64BIT)
14009 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
14010 else
14011 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
14013 tp = get_thread_pointer (Pmode, true);
14014 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
14016 if (GET_MODE (x) != Pmode)
14017 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14019 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14021 else
14023 rtx caddr = ix86_tls_get_addr ();
14025 if (TARGET_64BIT)
14027 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14028 rtx_insn *insns;
14030 start_sequence ();
14031 emit_call_insn
14032 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
14033 insns = get_insns ();
14034 end_sequence ();
14036 if (GET_MODE (x) != Pmode)
14037 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14039 RTL_CONST_CALL_P (insns) = 1;
14040 emit_libcall_block (insns, dest, rax, x);
14042 else
14043 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
14045 break;
14047 case TLS_MODEL_LOCAL_DYNAMIC:
14048 base = gen_reg_rtx (Pmode);
14050 if (!TARGET_64BIT)
14052 if (flag_pic)
14053 pic = pic_offset_table_rtx;
14054 else
14056 pic = gen_reg_rtx (Pmode);
14057 emit_insn (gen_set_got (pic));
14061 if (TARGET_GNU2_TLS)
14063 rtx tmp = ix86_tls_module_base ();
14065 if (TARGET_64BIT)
14066 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
14067 else
14068 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
14070 tp = get_thread_pointer (Pmode, true);
14071 set_unique_reg_note (get_last_insn (), REG_EQUAL,
14072 gen_rtx_MINUS (Pmode, tmp, tp));
14074 else
14076 rtx caddr = ix86_tls_get_addr ();
14078 if (TARGET_64BIT)
14080 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14081 rtx_insn *insns;
14082 rtx eqv;
14084 start_sequence ();
14085 emit_call_insn
14086 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
14087 insns = get_insns ();
14088 end_sequence ();
14090 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
14091 share the LD_BASE result with other LD model accesses. */
14092 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
14093 UNSPEC_TLS_LD_BASE);
14095 RTL_CONST_CALL_P (insns) = 1;
14096 emit_libcall_block (insns, base, rax, eqv);
14098 else
14099 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
14102 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
14103 off = gen_rtx_CONST (Pmode, off);
14105 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
14107 if (TARGET_GNU2_TLS)
14109 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
14111 if (GET_MODE (x) != Pmode)
14112 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14114 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14116 break;
14118 case TLS_MODEL_INITIAL_EXEC:
14119 if (TARGET_64BIT)
14121 if (TARGET_SUN_TLS && !TARGET_X32)
14123 /* The Sun linker took the AMD64 TLS spec literally
14124 and can only handle %rax as destination of the
14125 initial executable code sequence. */
14127 dest = gen_reg_rtx (DImode);
14128 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
14129 return dest;
14132 /* Generate DImode references to avoid %fs:(%reg32)
14133 problems and linker IE->LE relaxation bug. */
14134 tp_mode = DImode;
14135 pic = NULL;
14136 type = UNSPEC_GOTNTPOFF;
14138 else if (flag_pic)
14140 set_pic_reg_ever_live ();
14141 pic = pic_offset_table_rtx;
14142 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
14144 else if (!TARGET_ANY_GNU_TLS)
14146 pic = gen_reg_rtx (Pmode);
14147 emit_insn (gen_set_got (pic));
14148 type = UNSPEC_GOTTPOFF;
14150 else
14152 pic = NULL;
14153 type = UNSPEC_INDNTPOFF;
14156 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
14157 off = gen_rtx_CONST (tp_mode, off);
14158 if (pic)
14159 off = gen_rtx_PLUS (tp_mode, pic, off);
14160 off = gen_const_mem (tp_mode, off);
14161 set_mem_alias_set (off, ix86_GOT_alias_set ());
14163 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14165 base = get_thread_pointer (tp_mode,
14166 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14167 off = force_reg (tp_mode, off);
14168 return gen_rtx_PLUS (tp_mode, base, off);
14170 else
14172 base = get_thread_pointer (Pmode, true);
14173 dest = gen_reg_rtx (Pmode);
14174 emit_insn (ix86_gen_sub3 (dest, base, off));
14176 break;
14178 case TLS_MODEL_LOCAL_EXEC:
14179 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14180 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14181 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14182 off = gen_rtx_CONST (Pmode, off);
14184 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14186 base = get_thread_pointer (Pmode,
14187 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14188 return gen_rtx_PLUS (Pmode, base, off);
14190 else
14192 base = get_thread_pointer (Pmode, true);
14193 dest = gen_reg_rtx (Pmode);
14194 emit_insn (ix86_gen_sub3 (dest, base, off));
14196 break;
14198 default:
14199 gcc_unreachable ();
14202 return dest;
14205 /* Create or return the unique __imp_DECL dllimport symbol corresponding
14206 to symbol DECL if BEIMPORT is true. Otherwise create or return the
14207 unique refptr-DECL symbol corresponding to symbol DECL. */
14209 struct dllimport_hasher : ggc_cache_hasher<tree_map *>
14211 static inline hashval_t hash (tree_map *m) { return m->hash; }
14212 static inline bool
14213 equal (tree_map *a, tree_map *b)
14215 return a->base.from == b->base.from;
14218 static void
14219 handle_cache_entry (tree_map *&m)
14221 extern void gt_ggc_mx (tree_map *&);
14222 if (m == HTAB_EMPTY_ENTRY || m == HTAB_DELETED_ENTRY)
14223 return;
14224 else if (ggc_marked_p (m->base.from))
14225 gt_ggc_mx (m);
14226 else
14227 m = static_cast<tree_map *> (HTAB_DELETED_ENTRY);
14231 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
14233 static tree
14234 get_dllimport_decl (tree decl, bool beimport)
14236 struct tree_map *h, in;
14237 const char *name;
14238 const char *prefix;
14239 size_t namelen, prefixlen;
14240 char *imp_name;
14241 tree to;
14242 rtx rtl;
14244 if (!dllimport_map)
14245 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
14247 in.hash = htab_hash_pointer (decl);
14248 in.base.from = decl;
14249 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
14250 h = *loc;
14251 if (h)
14252 return h->to;
14254 *loc = h = ggc_alloc<tree_map> ();
14255 h->hash = in.hash;
14256 h->base.from = decl;
14257 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14258 VAR_DECL, NULL, ptr_type_node);
14259 DECL_ARTIFICIAL (to) = 1;
14260 DECL_IGNORED_P (to) = 1;
14261 DECL_EXTERNAL (to) = 1;
14262 TREE_READONLY (to) = 1;
14264 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14265 name = targetm.strip_name_encoding (name);
14266 if (beimport)
14267 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14268 ? "*__imp_" : "*__imp__";
14269 else
14270 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14271 namelen = strlen (name);
14272 prefixlen = strlen (prefix);
14273 imp_name = (char *) alloca (namelen + prefixlen + 1);
14274 memcpy (imp_name, prefix, prefixlen);
14275 memcpy (imp_name + prefixlen, name, namelen + 1);
14277 name = ggc_alloc_string (imp_name, namelen + prefixlen);
14278 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14279 SET_SYMBOL_REF_DECL (rtl, to);
14280 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14281 if (!beimport)
14283 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14284 #ifdef SUB_TARGET_RECORD_STUB
14285 SUB_TARGET_RECORD_STUB (name);
14286 #endif
14289 rtl = gen_const_mem (Pmode, rtl);
14290 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14292 SET_DECL_RTL (to, rtl);
14293 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14295 return to;
14298 /* Expand SYMBOL into its corresponding far-addresse symbol.
14299 WANT_REG is true if we require the result be a register. */
14301 static rtx
14302 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14304 tree imp_decl;
14305 rtx x;
14307 gcc_assert (SYMBOL_REF_DECL (symbol));
14308 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14310 x = DECL_RTL (imp_decl);
14311 if (want_reg)
14312 x = force_reg (Pmode, x);
14313 return x;
14316 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
14317 true if we require the result be a register. */
14319 static rtx
14320 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14322 tree imp_decl;
14323 rtx x;
14325 gcc_assert (SYMBOL_REF_DECL (symbol));
14326 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14328 x = DECL_RTL (imp_decl);
14329 if (want_reg)
14330 x = force_reg (Pmode, x);
14331 return x;
14334 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
14335 is true if we require the result be a register. */
14337 static rtx
14338 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14340 if (!TARGET_PECOFF)
14341 return NULL_RTX;
14343 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14345 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14346 return legitimize_dllimport_symbol (addr, inreg);
14347 if (GET_CODE (addr) == CONST
14348 && GET_CODE (XEXP (addr, 0)) == PLUS
14349 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14350 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14352 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14353 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14357 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14358 return NULL_RTX;
14359 if (GET_CODE (addr) == SYMBOL_REF
14360 && !is_imported_p (addr)
14361 && SYMBOL_REF_EXTERNAL_P (addr)
14362 && SYMBOL_REF_DECL (addr))
14363 return legitimize_pe_coff_extern_decl (addr, inreg);
14365 if (GET_CODE (addr) == CONST
14366 && GET_CODE (XEXP (addr, 0)) == PLUS
14367 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14368 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14369 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14370 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14372 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14373 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14375 return NULL_RTX;
14378 /* Try machine-dependent ways of modifying an illegitimate address
14379 to be legitimate. If we find one, return the new, valid address.
14380 This macro is used in only one place: `memory_address' in explow.c.
14382 OLDX is the address as it was before break_out_memory_refs was called.
14383 In some cases it is useful to look at this to decide what needs to be done.
14385 It is always safe for this macro to do nothing. It exists to recognize
14386 opportunities to optimize the output.
14388 For the 80386, we handle X+REG by loading X into a register R and
14389 using R+REG. R will go in a general reg and indexing will be used.
14390 However, if REG is a broken-out memory address or multiplication,
14391 nothing needs to be done because REG can certainly go in a general reg.
14393 When -fpic is used, special handling is needed for symbolic references.
14394 See comments by legitimize_pic_address in i386.c for details. */
14396 static rtx
14397 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14399 bool changed = false;
14400 unsigned log;
14402 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14403 if (log)
14404 return legitimize_tls_address (x, (enum tls_model) log, false);
14405 if (GET_CODE (x) == CONST
14406 && GET_CODE (XEXP (x, 0)) == PLUS
14407 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14408 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14410 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14411 (enum tls_model) log, false);
14412 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14415 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14417 rtx tmp = legitimize_pe_coff_symbol (x, true);
14418 if (tmp)
14419 return tmp;
14422 if (flag_pic && SYMBOLIC_CONST (x))
14423 return legitimize_pic_address (x, 0);
14425 #if TARGET_MACHO
14426 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14427 return machopic_indirect_data_reference (x, 0);
14428 #endif
14430 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14431 if (GET_CODE (x) == ASHIFT
14432 && CONST_INT_P (XEXP (x, 1))
14433 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14435 changed = true;
14436 log = INTVAL (XEXP (x, 1));
14437 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14438 GEN_INT (1 << log));
14441 if (GET_CODE (x) == PLUS)
14443 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14445 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14446 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14447 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14449 changed = true;
14450 log = INTVAL (XEXP (XEXP (x, 0), 1));
14451 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14452 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14453 GEN_INT (1 << log));
14456 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14457 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14458 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14460 changed = true;
14461 log = INTVAL (XEXP (XEXP (x, 1), 1));
14462 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14463 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14464 GEN_INT (1 << log));
14467 /* Put multiply first if it isn't already. */
14468 if (GET_CODE (XEXP (x, 1)) == MULT)
14470 std::swap (XEXP (x, 0), XEXP (x, 1));
14471 changed = true;
14474 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14475 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14476 created by virtual register instantiation, register elimination, and
14477 similar optimizations. */
14478 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14480 changed = true;
14481 x = gen_rtx_PLUS (Pmode,
14482 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14483 XEXP (XEXP (x, 1), 0)),
14484 XEXP (XEXP (x, 1), 1));
14487 /* Canonicalize
14488 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14489 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14490 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14491 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14492 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14493 && CONSTANT_P (XEXP (x, 1)))
14495 rtx constant;
14496 rtx other = NULL_RTX;
14498 if (CONST_INT_P (XEXP (x, 1)))
14500 constant = XEXP (x, 1);
14501 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14503 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14505 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14506 other = XEXP (x, 1);
14508 else
14509 constant = 0;
14511 if (constant)
14513 changed = true;
14514 x = gen_rtx_PLUS (Pmode,
14515 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14516 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14517 plus_constant (Pmode, other,
14518 INTVAL (constant)));
14522 if (changed && ix86_legitimate_address_p (mode, x, false))
14523 return x;
14525 if (GET_CODE (XEXP (x, 0)) == MULT)
14527 changed = true;
14528 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14531 if (GET_CODE (XEXP (x, 1)) == MULT)
14533 changed = true;
14534 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14537 if (changed
14538 && REG_P (XEXP (x, 1))
14539 && REG_P (XEXP (x, 0)))
14540 return x;
14542 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14544 changed = true;
14545 x = legitimize_pic_address (x, 0);
14548 if (changed && ix86_legitimate_address_p (mode, x, false))
14549 return x;
14551 if (REG_P (XEXP (x, 0)))
14553 rtx temp = gen_reg_rtx (Pmode);
14554 rtx val = force_operand (XEXP (x, 1), temp);
14555 if (val != temp)
14557 val = convert_to_mode (Pmode, val, 1);
14558 emit_move_insn (temp, val);
14561 XEXP (x, 1) = temp;
14562 return x;
14565 else if (REG_P (XEXP (x, 1)))
14567 rtx temp = gen_reg_rtx (Pmode);
14568 rtx val = force_operand (XEXP (x, 0), temp);
14569 if (val != temp)
14571 val = convert_to_mode (Pmode, val, 1);
14572 emit_move_insn (temp, val);
14575 XEXP (x, 0) = temp;
14576 return x;
14580 return x;
14583 /* Print an integer constant expression in assembler syntax. Addition
14584 and subtraction are the only arithmetic that may appear in these
14585 expressions. FILE is the stdio stream to write to, X is the rtx, and
14586 CODE is the operand print code from the output string. */
14588 static void
14589 output_pic_addr_const (FILE *file, rtx x, int code)
14591 char buf[256];
14593 switch (GET_CODE (x))
14595 case PC:
14596 gcc_assert (flag_pic);
14597 putc ('.', file);
14598 break;
14600 case SYMBOL_REF:
14601 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14602 output_addr_const (file, x);
14603 else
14605 const char *name = XSTR (x, 0);
14607 /* Mark the decl as referenced so that cgraph will
14608 output the function. */
14609 if (SYMBOL_REF_DECL (x))
14610 mark_decl_referenced (SYMBOL_REF_DECL (x));
14612 #if TARGET_MACHO
14613 if (MACHOPIC_INDIRECT
14614 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14615 name = machopic_indirection_name (x, /*stub_p=*/true);
14616 #endif
14617 assemble_name (file, name);
14619 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14620 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14621 fputs ("@PLT", file);
14622 break;
14624 case LABEL_REF:
14625 x = XEXP (x, 0);
14626 /* FALLTHRU */
14627 case CODE_LABEL:
14628 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14629 assemble_name (asm_out_file, buf);
14630 break;
14632 case CONST_INT:
14633 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14634 break;
14636 case CONST:
14637 /* This used to output parentheses around the expression,
14638 but that does not work on the 386 (either ATT or BSD assembler). */
14639 output_pic_addr_const (file, XEXP (x, 0), code);
14640 break;
14642 case CONST_DOUBLE:
14643 if (GET_MODE (x) == VOIDmode)
14645 /* We can use %d if the number is <32 bits and positive. */
14646 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14647 fprintf (file, "0x%lx%08lx",
14648 (unsigned long) CONST_DOUBLE_HIGH (x),
14649 (unsigned long) CONST_DOUBLE_LOW (x));
14650 else
14651 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14653 else
14654 /* We can't handle floating point constants;
14655 TARGET_PRINT_OPERAND must handle them. */
14656 output_operand_lossage ("floating constant misused");
14657 break;
14659 case PLUS:
14660 /* Some assemblers need integer constants to appear first. */
14661 if (CONST_INT_P (XEXP (x, 0)))
14663 output_pic_addr_const (file, XEXP (x, 0), code);
14664 putc ('+', file);
14665 output_pic_addr_const (file, XEXP (x, 1), code);
14667 else
14669 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14670 output_pic_addr_const (file, XEXP (x, 1), code);
14671 putc ('+', file);
14672 output_pic_addr_const (file, XEXP (x, 0), code);
14674 break;
14676 case MINUS:
14677 if (!TARGET_MACHO)
14678 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14679 output_pic_addr_const (file, XEXP (x, 0), code);
14680 putc ('-', file);
14681 output_pic_addr_const (file, XEXP (x, 1), code);
14682 if (!TARGET_MACHO)
14683 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14684 break;
14686 case UNSPEC:
14687 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14689 bool f = i386_asm_output_addr_const_extra (file, x);
14690 gcc_assert (f);
14691 break;
14694 gcc_assert (XVECLEN (x, 0) == 1);
14695 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14696 switch (XINT (x, 1))
14698 case UNSPEC_GOT:
14699 fputs ("@GOT", file);
14700 break;
14701 case UNSPEC_GOTOFF:
14702 fputs ("@GOTOFF", file);
14703 break;
14704 case UNSPEC_PLTOFF:
14705 fputs ("@PLTOFF", file);
14706 break;
14707 case UNSPEC_PCREL:
14708 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14709 "(%rip)" : "[rip]", file);
14710 break;
14711 case UNSPEC_GOTPCREL:
14712 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14713 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14714 break;
14715 case UNSPEC_GOTTPOFF:
14716 /* FIXME: This might be @TPOFF in Sun ld too. */
14717 fputs ("@gottpoff", file);
14718 break;
14719 case UNSPEC_TPOFF:
14720 fputs ("@tpoff", file);
14721 break;
14722 case UNSPEC_NTPOFF:
14723 if (TARGET_64BIT)
14724 fputs ("@tpoff", file);
14725 else
14726 fputs ("@ntpoff", file);
14727 break;
14728 case UNSPEC_DTPOFF:
14729 fputs ("@dtpoff", file);
14730 break;
14731 case UNSPEC_GOTNTPOFF:
14732 if (TARGET_64BIT)
14733 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14734 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14735 else
14736 fputs ("@gotntpoff", file);
14737 break;
14738 case UNSPEC_INDNTPOFF:
14739 fputs ("@indntpoff", file);
14740 break;
14741 #if TARGET_MACHO
14742 case UNSPEC_MACHOPIC_OFFSET:
14743 putc ('-', file);
14744 machopic_output_function_base_name (file);
14745 break;
14746 #endif
14747 default:
14748 output_operand_lossage ("invalid UNSPEC as operand");
14749 break;
14751 break;
14753 default:
14754 output_operand_lossage ("invalid expression as operand");
14758 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14759 We need to emit DTP-relative relocations. */
14761 static void ATTRIBUTE_UNUSED
14762 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14764 fputs (ASM_LONG, file);
14765 output_addr_const (file, x);
14766 fputs ("@dtpoff", file);
14767 switch (size)
14769 case 4:
14770 break;
14771 case 8:
14772 fputs (", 0", file);
14773 break;
14774 default:
14775 gcc_unreachable ();
14779 /* Return true if X is a representation of the PIC register. This copes
14780 with calls from ix86_find_base_term, where the register might have
14781 been replaced by a cselib value. */
14783 static bool
14784 ix86_pic_register_p (rtx x)
14786 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14787 return (pic_offset_table_rtx
14788 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14789 else if (!REG_P (x))
14790 return false;
14791 else if (pic_offset_table_rtx)
14793 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14794 return true;
14795 if (HARD_REGISTER_P (x)
14796 && !HARD_REGISTER_P (pic_offset_table_rtx)
14797 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14798 return true;
14799 return false;
14801 else
14802 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14805 /* Helper function for ix86_delegitimize_address.
14806 Attempt to delegitimize TLS local-exec accesses. */
14808 static rtx
14809 ix86_delegitimize_tls_address (rtx orig_x)
14811 rtx x = orig_x, unspec;
14812 struct ix86_address addr;
14814 if (!TARGET_TLS_DIRECT_SEG_REFS)
14815 return orig_x;
14816 if (MEM_P (x))
14817 x = XEXP (x, 0);
14818 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14819 return orig_x;
14820 if (ix86_decompose_address (x, &addr) == 0
14821 || addr.seg != DEFAULT_TLS_SEG_REG
14822 || addr.disp == NULL_RTX
14823 || GET_CODE (addr.disp) != CONST)
14824 return orig_x;
14825 unspec = XEXP (addr.disp, 0);
14826 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14827 unspec = XEXP (unspec, 0);
14828 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14829 return orig_x;
14830 x = XVECEXP (unspec, 0, 0);
14831 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14832 if (unspec != XEXP (addr.disp, 0))
14833 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14834 if (addr.index)
14836 rtx idx = addr.index;
14837 if (addr.scale != 1)
14838 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14839 x = gen_rtx_PLUS (Pmode, idx, x);
14841 if (addr.base)
14842 x = gen_rtx_PLUS (Pmode, addr.base, x);
14843 if (MEM_P (orig_x))
14844 x = replace_equiv_address_nv (orig_x, x);
14845 return x;
14848 /* In the name of slightly smaller debug output, and to cater to
14849 general assembler lossage, recognize PIC+GOTOFF and turn it back
14850 into a direct symbol reference.
14852 On Darwin, this is necessary to avoid a crash, because Darwin
14853 has a different PIC label for each routine but the DWARF debugging
14854 information is not associated with any particular routine, so it's
14855 necessary to remove references to the PIC label from RTL stored by
14856 the DWARF output code. */
14858 static rtx
14859 ix86_delegitimize_address (rtx x)
14861 rtx orig_x = delegitimize_mem_from_attrs (x);
14862 /* addend is NULL or some rtx if x is something+GOTOFF where
14863 something doesn't include the PIC register. */
14864 rtx addend = NULL_RTX;
14865 /* reg_addend is NULL or a multiple of some register. */
14866 rtx reg_addend = NULL_RTX;
14867 /* const_addend is NULL or a const_int. */
14868 rtx const_addend = NULL_RTX;
14869 /* This is the result, or NULL. */
14870 rtx result = NULL_RTX;
14872 x = orig_x;
14874 if (MEM_P (x))
14875 x = XEXP (x, 0);
14877 if (TARGET_64BIT)
14879 if (GET_CODE (x) == CONST
14880 && GET_CODE (XEXP (x, 0)) == PLUS
14881 && GET_MODE (XEXP (x, 0)) == Pmode
14882 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14883 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14884 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14886 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14887 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14888 if (MEM_P (orig_x))
14889 x = replace_equiv_address_nv (orig_x, x);
14890 return x;
14893 if (GET_CODE (x) == CONST
14894 && GET_CODE (XEXP (x, 0)) == UNSPEC
14895 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14896 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14897 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14899 x = XVECEXP (XEXP (x, 0), 0, 0);
14900 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14902 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14903 GET_MODE (x), 0);
14904 if (x == NULL_RTX)
14905 return orig_x;
14907 return x;
14910 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14911 return ix86_delegitimize_tls_address (orig_x);
14913 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14914 and -mcmodel=medium -fpic. */
14917 if (GET_CODE (x) != PLUS
14918 || GET_CODE (XEXP (x, 1)) != CONST)
14919 return ix86_delegitimize_tls_address (orig_x);
14921 if (ix86_pic_register_p (XEXP (x, 0)))
14922 /* %ebx + GOT/GOTOFF */
14924 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14926 /* %ebx + %reg * scale + GOT/GOTOFF */
14927 reg_addend = XEXP (x, 0);
14928 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14929 reg_addend = XEXP (reg_addend, 1);
14930 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14931 reg_addend = XEXP (reg_addend, 0);
14932 else
14934 reg_addend = NULL_RTX;
14935 addend = XEXP (x, 0);
14938 else
14939 addend = XEXP (x, 0);
14941 x = XEXP (XEXP (x, 1), 0);
14942 if (GET_CODE (x) == PLUS
14943 && CONST_INT_P (XEXP (x, 1)))
14945 const_addend = XEXP (x, 1);
14946 x = XEXP (x, 0);
14949 if (GET_CODE (x) == UNSPEC
14950 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14951 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14952 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14953 && !MEM_P (orig_x) && !addend)))
14954 result = XVECEXP (x, 0, 0);
14956 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14957 && !MEM_P (orig_x))
14958 result = XVECEXP (x, 0, 0);
14960 if (! result)
14961 return ix86_delegitimize_tls_address (orig_x);
14963 if (const_addend)
14964 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14965 if (reg_addend)
14966 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14967 if (addend)
14969 /* If the rest of original X doesn't involve the PIC register, add
14970 addend and subtract pic_offset_table_rtx. This can happen e.g.
14971 for code like:
14972 leal (%ebx, %ecx, 4), %ecx
14974 movl foo@GOTOFF(%ecx), %edx
14975 in which case we return (%ecx - %ebx) + foo
14976 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
14977 and reload has completed. */
14978 if (pic_offset_table_rtx
14979 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
14980 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14981 pic_offset_table_rtx),
14982 result);
14983 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
14985 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
14986 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
14987 result = gen_rtx_PLUS (Pmode, tmp, result);
14989 else
14990 return orig_x;
14992 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14994 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14995 if (result == NULL_RTX)
14996 return orig_x;
14998 return result;
15001 /* If X is a machine specific address (i.e. a symbol or label being
15002 referenced as a displacement from the GOT implemented using an
15003 UNSPEC), then return the base term. Otherwise return X. */
15006 ix86_find_base_term (rtx x)
15008 rtx term;
15010 if (TARGET_64BIT)
15012 if (GET_CODE (x) != CONST)
15013 return x;
15014 term = XEXP (x, 0);
15015 if (GET_CODE (term) == PLUS
15016 && (CONST_INT_P (XEXP (term, 1))
15017 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
15018 term = XEXP (term, 0);
15019 if (GET_CODE (term) != UNSPEC
15020 || (XINT (term, 1) != UNSPEC_GOTPCREL
15021 && XINT (term, 1) != UNSPEC_PCREL))
15022 return x;
15024 return XVECEXP (term, 0, 0);
15027 return ix86_delegitimize_address (x);
15030 static void
15031 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
15032 bool fp, FILE *file)
15034 const char *suffix;
15036 if (mode == CCFPmode || mode == CCFPUmode)
15038 code = ix86_fp_compare_code_to_integer (code);
15039 mode = CCmode;
15041 if (reverse)
15042 code = reverse_condition (code);
15044 switch (code)
15046 case EQ:
15047 switch (mode)
15049 case CCAmode:
15050 suffix = "a";
15051 break;
15053 case CCCmode:
15054 suffix = "c";
15055 break;
15057 case CCOmode:
15058 suffix = "o";
15059 break;
15061 case CCSmode:
15062 suffix = "s";
15063 break;
15065 default:
15066 suffix = "e";
15068 break;
15069 case NE:
15070 switch (mode)
15072 case CCAmode:
15073 suffix = "na";
15074 break;
15076 case CCCmode:
15077 suffix = "nc";
15078 break;
15080 case CCOmode:
15081 suffix = "no";
15082 break;
15084 case CCSmode:
15085 suffix = "ns";
15086 break;
15088 default:
15089 suffix = "ne";
15091 break;
15092 case GT:
15093 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
15094 suffix = "g";
15095 break;
15096 case GTU:
15097 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
15098 Those same assemblers have the same but opposite lossage on cmov. */
15099 if (mode == CCmode)
15100 suffix = fp ? "nbe" : "a";
15101 else
15102 gcc_unreachable ();
15103 break;
15104 case LT:
15105 switch (mode)
15107 case CCNOmode:
15108 case CCGOCmode:
15109 suffix = "s";
15110 break;
15112 case CCmode:
15113 case CCGCmode:
15114 suffix = "l";
15115 break;
15117 default:
15118 gcc_unreachable ();
15120 break;
15121 case LTU:
15122 if (mode == CCmode)
15123 suffix = "b";
15124 else if (mode == CCCmode)
15125 suffix = fp ? "b" : "c";
15126 else
15127 gcc_unreachable ();
15128 break;
15129 case GE:
15130 switch (mode)
15132 case CCNOmode:
15133 case CCGOCmode:
15134 suffix = "ns";
15135 break;
15137 case CCmode:
15138 case CCGCmode:
15139 suffix = "ge";
15140 break;
15142 default:
15143 gcc_unreachable ();
15145 break;
15146 case GEU:
15147 if (mode == CCmode)
15148 suffix = "nb";
15149 else if (mode == CCCmode)
15150 suffix = fp ? "nb" : "nc";
15151 else
15152 gcc_unreachable ();
15153 break;
15154 case LE:
15155 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
15156 suffix = "le";
15157 break;
15158 case LEU:
15159 if (mode == CCmode)
15160 suffix = "be";
15161 else
15162 gcc_unreachable ();
15163 break;
15164 case UNORDERED:
15165 suffix = fp ? "u" : "p";
15166 break;
15167 case ORDERED:
15168 suffix = fp ? "nu" : "np";
15169 break;
15170 default:
15171 gcc_unreachable ();
15173 fputs (suffix, file);
15176 /* Print the name of register X to FILE based on its machine mode and number.
15177 If CODE is 'w', pretend the mode is HImode.
15178 If CODE is 'b', pretend the mode is QImode.
15179 If CODE is 'k', pretend the mode is SImode.
15180 If CODE is 'q', pretend the mode is DImode.
15181 If CODE is 'x', pretend the mode is V4SFmode.
15182 If CODE is 't', pretend the mode is V8SFmode.
15183 If CODE is 'g', pretend the mode is V16SFmode.
15184 If CODE is 'h', pretend the reg is the 'high' byte register.
15185 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
15186 If CODE is 'd', duplicate the operand for AVX instruction.
15189 void
15190 print_reg (rtx x, int code, FILE *file)
15192 const char *reg;
15193 unsigned int regno;
15194 bool duplicated = code == 'd' && TARGET_AVX;
15196 if (ASSEMBLER_DIALECT == ASM_ATT)
15197 putc ('%', file);
15199 if (x == pc_rtx)
15201 gcc_assert (TARGET_64BIT);
15202 fputs ("rip", file);
15203 return;
15206 regno = true_regnum (x);
15207 gcc_assert (regno != ARG_POINTER_REGNUM
15208 && regno != FRAME_POINTER_REGNUM
15209 && regno != FLAGS_REG
15210 && regno != FPSR_REG
15211 && regno != FPCR_REG);
15213 if (code == 'w' || MMX_REG_P (x))
15214 code = 2;
15215 else if (code == 'b')
15216 code = 1;
15217 else if (code == 'k')
15218 code = 4;
15219 else if (code == 'q')
15220 code = 8;
15221 else if (code == 'y')
15222 code = 3;
15223 else if (code == 'h')
15224 code = 0;
15225 else if (code == 'x')
15226 code = 16;
15227 else if (code == 't')
15228 code = 32;
15229 else if (code == 'g')
15230 code = 64;
15231 else
15232 code = GET_MODE_SIZE (GET_MODE (x));
15234 /* Irritatingly, AMD extended registers use different naming convention
15235 from the normal registers: "r%d[bwd]" */
15236 if (REX_INT_REGNO_P (regno))
15238 gcc_assert (TARGET_64BIT);
15239 putc ('r', file);
15240 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
15241 switch (code)
15243 case 0:
15244 error ("extended registers have no high halves");
15245 break;
15246 case 1:
15247 putc ('b', file);
15248 break;
15249 case 2:
15250 putc ('w', file);
15251 break;
15252 case 4:
15253 putc ('d', file);
15254 break;
15255 case 8:
15256 /* no suffix */
15257 break;
15258 default:
15259 error ("unsupported operand size for extended register");
15260 break;
15262 return;
15265 reg = NULL;
15266 switch (code)
15268 case 3:
15269 if (STACK_TOP_P (x))
15271 reg = "st(0)";
15272 break;
15274 /* FALLTHRU */
15275 case 8:
15276 case 4:
15277 case 12:
15278 if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x) && ! ANY_BND_REG_P (x))
15279 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
15280 /* FALLTHRU */
15281 case 16:
15282 case 2:
15283 normal:
15284 reg = hi_reg_name[regno];
15285 break;
15286 case 1:
15287 if (regno >= ARRAY_SIZE (qi_reg_name))
15288 goto normal;
15289 reg = qi_reg_name[regno];
15290 break;
15291 case 0:
15292 if (regno >= ARRAY_SIZE (qi_high_reg_name))
15293 goto normal;
15294 reg = qi_high_reg_name[regno];
15295 break;
15296 case 32:
15297 if (SSE_REG_P (x))
15299 gcc_assert (!duplicated);
15300 putc ('y', file);
15301 fputs (hi_reg_name[regno] + 1, file);
15302 return;
15304 case 64:
15305 if (SSE_REG_P (x))
15307 gcc_assert (!duplicated);
15308 putc ('z', file);
15309 fputs (hi_reg_name[REGNO (x)] + 1, file);
15310 return;
15312 break;
15313 default:
15314 gcc_unreachable ();
15317 fputs (reg, file);
15318 if (duplicated)
15320 if (ASSEMBLER_DIALECT == ASM_ATT)
15321 fprintf (file, ", %%%s", reg);
15322 else
15323 fprintf (file, ", %s", reg);
15327 /* Meaning of CODE:
15328 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15329 C -- print opcode suffix for set/cmov insn.
15330 c -- like C, but print reversed condition
15331 F,f -- likewise, but for floating-point.
15332 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15333 otherwise nothing
15334 R -- print embeded rounding and sae.
15335 r -- print only sae.
15336 z -- print the opcode suffix for the size of the current operand.
15337 Z -- likewise, with special suffixes for x87 instructions.
15338 * -- print a star (in certain assembler syntax)
15339 A -- print an absolute memory reference.
15340 E -- print address with DImode register names if TARGET_64BIT.
15341 w -- print the operand as if it's a "word" (HImode) even if it isn't.
15342 s -- print a shift double count, followed by the assemblers argument
15343 delimiter.
15344 b -- print the QImode name of the register for the indicated operand.
15345 %b0 would print %al if operands[0] is reg 0.
15346 w -- likewise, print the HImode name of the register.
15347 k -- likewise, print the SImode name of the register.
15348 q -- likewise, print the DImode name of the register.
15349 x -- likewise, print the V4SFmode name of the register.
15350 t -- likewise, print the V8SFmode name of the register.
15351 g -- likewise, print the V16SFmode name of the register.
15352 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15353 y -- print "st(0)" instead of "st" as a register.
15354 d -- print duplicated register operand for AVX instruction.
15355 D -- print condition for SSE cmp instruction.
15356 P -- if PIC, print an @PLT suffix.
15357 p -- print raw symbol name.
15358 X -- don't print any sort of PIC '@' suffix for a symbol.
15359 & -- print some in-use local-dynamic symbol name.
15360 H -- print a memory address offset by 8; used for sse high-parts
15361 Y -- print condition for XOP pcom* instruction.
15362 + -- print a branch hint as 'cs' or 'ds' prefix
15363 ; -- print a semicolon (after prefixes due to bug in older gas).
15364 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15365 @ -- print a segment register of thread base pointer load
15366 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15367 ! -- print MPX prefix for jxx/call/ret instructions if required.
15370 void
15371 ix86_print_operand (FILE *file, rtx x, int code)
15373 if (code)
15375 switch (code)
15377 case 'A':
15378 switch (ASSEMBLER_DIALECT)
15380 case ASM_ATT:
15381 putc ('*', file);
15382 break;
15384 case ASM_INTEL:
15385 /* Intel syntax. For absolute addresses, registers should not
15386 be surrounded by braces. */
15387 if (!REG_P (x))
15389 putc ('[', file);
15390 ix86_print_operand (file, x, 0);
15391 putc (']', file);
15392 return;
15394 break;
15396 default:
15397 gcc_unreachable ();
15400 ix86_print_operand (file, x, 0);
15401 return;
15403 case 'E':
15404 /* Wrap address in an UNSPEC to declare special handling. */
15405 if (TARGET_64BIT)
15406 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15408 output_address (x);
15409 return;
15411 case 'L':
15412 if (ASSEMBLER_DIALECT == ASM_ATT)
15413 putc ('l', file);
15414 return;
15416 case 'W':
15417 if (ASSEMBLER_DIALECT == ASM_ATT)
15418 putc ('w', file);
15419 return;
15421 case 'B':
15422 if (ASSEMBLER_DIALECT == ASM_ATT)
15423 putc ('b', file);
15424 return;
15426 case 'Q':
15427 if (ASSEMBLER_DIALECT == ASM_ATT)
15428 putc ('l', file);
15429 return;
15431 case 'S':
15432 if (ASSEMBLER_DIALECT == ASM_ATT)
15433 putc ('s', file);
15434 return;
15436 case 'T':
15437 if (ASSEMBLER_DIALECT == ASM_ATT)
15438 putc ('t', file);
15439 return;
15441 case 'O':
15442 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15443 if (ASSEMBLER_DIALECT != ASM_ATT)
15444 return;
15446 switch (GET_MODE_SIZE (GET_MODE (x)))
15448 case 2:
15449 putc ('w', file);
15450 break;
15452 case 4:
15453 putc ('l', file);
15454 break;
15456 case 8:
15457 putc ('q', file);
15458 break;
15460 default:
15461 output_operand_lossage
15462 ("invalid operand size for operand code 'O'");
15463 return;
15466 putc ('.', file);
15467 #endif
15468 return;
15470 case 'z':
15471 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15473 /* Opcodes don't get size suffixes if using Intel opcodes. */
15474 if (ASSEMBLER_DIALECT == ASM_INTEL)
15475 return;
15477 switch (GET_MODE_SIZE (GET_MODE (x)))
15479 case 1:
15480 putc ('b', file);
15481 return;
15483 case 2:
15484 putc ('w', file);
15485 return;
15487 case 4:
15488 putc ('l', file);
15489 return;
15491 case 8:
15492 putc ('q', file);
15493 return;
15495 default:
15496 output_operand_lossage
15497 ("invalid operand size for operand code 'z'");
15498 return;
15502 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15503 warning
15504 (0, "non-integer operand used with operand code 'z'");
15505 /* FALLTHRU */
15507 case 'Z':
15508 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15509 if (ASSEMBLER_DIALECT == ASM_INTEL)
15510 return;
15512 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15514 switch (GET_MODE_SIZE (GET_MODE (x)))
15516 case 2:
15517 #ifdef HAVE_AS_IX86_FILDS
15518 putc ('s', file);
15519 #endif
15520 return;
15522 case 4:
15523 putc ('l', file);
15524 return;
15526 case 8:
15527 #ifdef HAVE_AS_IX86_FILDQ
15528 putc ('q', file);
15529 #else
15530 fputs ("ll", file);
15531 #endif
15532 return;
15534 default:
15535 break;
15538 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15540 /* 387 opcodes don't get size suffixes
15541 if the operands are registers. */
15542 if (STACK_REG_P (x))
15543 return;
15545 switch (GET_MODE_SIZE (GET_MODE (x)))
15547 case 4:
15548 putc ('s', file);
15549 return;
15551 case 8:
15552 putc ('l', file);
15553 return;
15555 case 12:
15556 case 16:
15557 putc ('t', file);
15558 return;
15560 default:
15561 break;
15564 else
15566 output_operand_lossage
15567 ("invalid operand type used with operand code 'Z'");
15568 return;
15571 output_operand_lossage
15572 ("invalid operand size for operand code 'Z'");
15573 return;
15575 case 'd':
15576 case 'b':
15577 case 'w':
15578 case 'k':
15579 case 'q':
15580 case 'h':
15581 case 't':
15582 case 'g':
15583 case 'y':
15584 case 'x':
15585 case 'X':
15586 case 'P':
15587 case 'p':
15588 break;
15590 case 's':
15591 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15593 ix86_print_operand (file, x, 0);
15594 fputs (", ", file);
15596 return;
15598 case 'Y':
15599 switch (GET_CODE (x))
15601 case NE:
15602 fputs ("neq", file);
15603 break;
15604 case EQ:
15605 fputs ("eq", file);
15606 break;
15607 case GE:
15608 case GEU:
15609 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15610 break;
15611 case GT:
15612 case GTU:
15613 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15614 break;
15615 case LE:
15616 case LEU:
15617 fputs ("le", file);
15618 break;
15619 case LT:
15620 case LTU:
15621 fputs ("lt", file);
15622 break;
15623 case UNORDERED:
15624 fputs ("unord", file);
15625 break;
15626 case ORDERED:
15627 fputs ("ord", file);
15628 break;
15629 case UNEQ:
15630 fputs ("ueq", file);
15631 break;
15632 case UNGE:
15633 fputs ("nlt", file);
15634 break;
15635 case UNGT:
15636 fputs ("nle", file);
15637 break;
15638 case UNLE:
15639 fputs ("ule", file);
15640 break;
15641 case UNLT:
15642 fputs ("ult", file);
15643 break;
15644 case LTGT:
15645 fputs ("une", file);
15646 break;
15647 default:
15648 output_operand_lossage ("operand is not a condition code, "
15649 "invalid operand code 'Y'");
15650 return;
15652 return;
15654 case 'D':
15655 /* Little bit of braindamage here. The SSE compare instructions
15656 does use completely different names for the comparisons that the
15657 fp conditional moves. */
15658 switch (GET_CODE (x))
15660 case UNEQ:
15661 if (TARGET_AVX)
15663 fputs ("eq_us", file);
15664 break;
15666 case EQ:
15667 fputs ("eq", file);
15668 break;
15669 case UNLT:
15670 if (TARGET_AVX)
15672 fputs ("nge", file);
15673 break;
15675 case LT:
15676 fputs ("lt", file);
15677 break;
15678 case UNLE:
15679 if (TARGET_AVX)
15681 fputs ("ngt", file);
15682 break;
15684 case LE:
15685 fputs ("le", file);
15686 break;
15687 case UNORDERED:
15688 fputs ("unord", file);
15689 break;
15690 case LTGT:
15691 if (TARGET_AVX)
15693 fputs ("neq_oq", file);
15694 break;
15696 case NE:
15697 fputs ("neq", file);
15698 break;
15699 case GE:
15700 if (TARGET_AVX)
15702 fputs ("ge", file);
15703 break;
15705 case UNGE:
15706 fputs ("nlt", file);
15707 break;
15708 case GT:
15709 if (TARGET_AVX)
15711 fputs ("gt", file);
15712 break;
15714 case UNGT:
15715 fputs ("nle", file);
15716 break;
15717 case ORDERED:
15718 fputs ("ord", file);
15719 break;
15720 default:
15721 output_operand_lossage ("operand is not a condition code, "
15722 "invalid operand code 'D'");
15723 return;
15725 return;
15727 case 'F':
15728 case 'f':
15729 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15730 if (ASSEMBLER_DIALECT == ASM_ATT)
15731 putc ('.', file);
15732 #endif
15734 case 'C':
15735 case 'c':
15736 if (!COMPARISON_P (x))
15738 output_operand_lossage ("operand is not a condition code, "
15739 "invalid operand code '%c'", code);
15740 return;
15742 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15743 code == 'c' || code == 'f',
15744 code == 'F' || code == 'f',
15745 file);
15746 return;
15748 case 'H':
15749 if (!offsettable_memref_p (x))
15751 output_operand_lossage ("operand is not an offsettable memory "
15752 "reference, invalid operand code 'H'");
15753 return;
15755 /* It doesn't actually matter what mode we use here, as we're
15756 only going to use this for printing. */
15757 x = adjust_address_nv (x, DImode, 8);
15758 /* Output 'qword ptr' for intel assembler dialect. */
15759 if (ASSEMBLER_DIALECT == ASM_INTEL)
15760 code = 'q';
15761 break;
15763 case 'K':
15764 gcc_assert (CONST_INT_P (x));
15766 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15767 #ifdef HAVE_AS_IX86_HLE
15768 fputs ("xacquire ", file);
15769 #else
15770 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15771 #endif
15772 else if (INTVAL (x) & IX86_HLE_RELEASE)
15773 #ifdef HAVE_AS_IX86_HLE
15774 fputs ("xrelease ", file);
15775 #else
15776 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15777 #endif
15778 /* We do not want to print value of the operand. */
15779 return;
15781 case 'N':
15782 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15783 fputs ("{z}", file);
15784 return;
15786 case 'r':
15787 gcc_assert (CONST_INT_P (x));
15788 gcc_assert (INTVAL (x) == ROUND_SAE);
15790 if (ASSEMBLER_DIALECT == ASM_INTEL)
15791 fputs (", ", file);
15793 fputs ("{sae}", file);
15795 if (ASSEMBLER_DIALECT == ASM_ATT)
15796 fputs (", ", file);
15798 return;
15800 case 'R':
15801 gcc_assert (CONST_INT_P (x));
15803 if (ASSEMBLER_DIALECT == ASM_INTEL)
15804 fputs (", ", file);
15806 switch (INTVAL (x))
15808 case ROUND_NEAREST_INT | ROUND_SAE:
15809 fputs ("{rn-sae}", file);
15810 break;
15811 case ROUND_NEG_INF | ROUND_SAE:
15812 fputs ("{rd-sae}", file);
15813 break;
15814 case ROUND_POS_INF | ROUND_SAE:
15815 fputs ("{ru-sae}", file);
15816 break;
15817 case ROUND_ZERO | ROUND_SAE:
15818 fputs ("{rz-sae}", file);
15819 break;
15820 default:
15821 gcc_unreachable ();
15824 if (ASSEMBLER_DIALECT == ASM_ATT)
15825 fputs (", ", file);
15827 return;
15829 case '*':
15830 if (ASSEMBLER_DIALECT == ASM_ATT)
15831 putc ('*', file);
15832 return;
15834 case '&':
15836 const char *name = get_some_local_dynamic_name ();
15837 if (name == NULL)
15838 output_operand_lossage ("'%%&' used without any "
15839 "local dynamic TLS references");
15840 else
15841 assemble_name (file, name);
15842 return;
15845 case '+':
15847 rtx x;
15849 if (!optimize
15850 || optimize_function_for_size_p (cfun)
15851 || !TARGET_BRANCH_PREDICTION_HINTS)
15852 return;
15854 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15855 if (x)
15857 int pred_val = XINT (x, 0);
15859 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15860 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15862 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15863 bool cputaken
15864 = final_forward_branch_p (current_output_insn) == 0;
15866 /* Emit hints only in the case default branch prediction
15867 heuristics would fail. */
15868 if (taken != cputaken)
15870 /* We use 3e (DS) prefix for taken branches and
15871 2e (CS) prefix for not taken branches. */
15872 if (taken)
15873 fputs ("ds ; ", file);
15874 else
15875 fputs ("cs ; ", file);
15879 return;
15882 case ';':
15883 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15884 putc (';', file);
15885 #endif
15886 return;
15888 case '@':
15889 if (ASSEMBLER_DIALECT == ASM_ATT)
15890 putc ('%', file);
15892 /* The kernel uses a different segment register for performance
15893 reasons; a system call would not have to trash the userspace
15894 segment register, which would be expensive. */
15895 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15896 fputs ("fs", file);
15897 else
15898 fputs ("gs", file);
15899 return;
15901 case '~':
15902 putc (TARGET_AVX2 ? 'i' : 'f', file);
15903 return;
15905 case '^':
15906 if (TARGET_64BIT && Pmode != word_mode)
15907 fputs ("addr32 ", file);
15908 return;
15910 case '!':
15911 if (ix86_bnd_prefixed_insn_p (current_output_insn))
15912 fputs ("bnd ", file);
15913 return;
15915 default:
15916 output_operand_lossage ("invalid operand code '%c'", code);
15920 if (REG_P (x))
15921 print_reg (x, code, file);
15923 else if (MEM_P (x))
15925 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15926 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15927 && GET_MODE (x) != BLKmode)
15929 const char * size;
15930 switch (GET_MODE_SIZE (GET_MODE (x)))
15932 case 1: size = "BYTE"; break;
15933 case 2: size = "WORD"; break;
15934 case 4: size = "DWORD"; break;
15935 case 8: size = "QWORD"; break;
15936 case 12: size = "TBYTE"; break;
15937 case 16:
15938 if (GET_MODE (x) == XFmode)
15939 size = "TBYTE";
15940 else
15941 size = "XMMWORD";
15942 break;
15943 case 32: size = "YMMWORD"; break;
15944 case 64: size = "ZMMWORD"; break;
15945 default:
15946 gcc_unreachable ();
15949 /* Check for explicit size override (codes 'b', 'w', 'k',
15950 'q' and 'x') */
15951 if (code == 'b')
15952 size = "BYTE";
15953 else if (code == 'w')
15954 size = "WORD";
15955 else if (code == 'k')
15956 size = "DWORD";
15957 else if (code == 'q')
15958 size = "QWORD";
15959 else if (code == 'x')
15960 size = "XMMWORD";
15962 fputs (size, file);
15963 fputs (" PTR ", file);
15966 x = XEXP (x, 0);
15967 /* Avoid (%rip) for call operands. */
15968 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15969 && !CONST_INT_P (x))
15970 output_addr_const (file, x);
15971 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15972 output_operand_lossage ("invalid constraints for operand");
15973 else
15974 output_address (x);
15977 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15979 REAL_VALUE_TYPE r;
15980 long l;
15982 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15983 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15985 if (ASSEMBLER_DIALECT == ASM_ATT)
15986 putc ('$', file);
15987 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15988 if (code == 'q')
15989 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15990 (unsigned long long) (int) l);
15991 else
15992 fprintf (file, "0x%08x", (unsigned int) l);
15995 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15997 REAL_VALUE_TYPE r;
15998 long l[2];
16000 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16001 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16003 if (ASSEMBLER_DIALECT == ASM_ATT)
16004 putc ('$', file);
16005 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
16008 /* These float cases don't actually occur as immediate operands. */
16009 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
16011 char dstr[30];
16013 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
16014 fputs (dstr, file);
16017 else
16019 /* We have patterns that allow zero sets of memory, for instance.
16020 In 64-bit mode, we should probably support all 8-byte vectors,
16021 since we can in fact encode that into an immediate. */
16022 if (GET_CODE (x) == CONST_VECTOR)
16024 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
16025 x = const0_rtx;
16028 if (code != 'P' && code != 'p')
16030 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
16032 if (ASSEMBLER_DIALECT == ASM_ATT)
16033 putc ('$', file);
16035 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
16036 || GET_CODE (x) == LABEL_REF)
16038 if (ASSEMBLER_DIALECT == ASM_ATT)
16039 putc ('$', file);
16040 else
16041 fputs ("OFFSET FLAT:", file);
16044 if (CONST_INT_P (x))
16045 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16046 else if (flag_pic || MACHOPIC_INDIRECT)
16047 output_pic_addr_const (file, x, code);
16048 else
16049 output_addr_const (file, x);
16053 static bool
16054 ix86_print_operand_punct_valid_p (unsigned char code)
16056 return (code == '@' || code == '*' || code == '+' || code == '&'
16057 || code == ';' || code == '~' || code == '^' || code == '!');
16060 /* Print a memory operand whose address is ADDR. */
16062 static void
16063 ix86_print_operand_address (FILE *file, rtx addr)
16065 struct ix86_address parts;
16066 rtx base, index, disp;
16067 int scale;
16068 int ok;
16069 bool vsib = false;
16070 int code = 0;
16072 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
16074 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16075 gcc_assert (parts.index == NULL_RTX);
16076 parts.index = XVECEXP (addr, 0, 1);
16077 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
16078 addr = XVECEXP (addr, 0, 0);
16079 vsib = true;
16081 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
16083 gcc_assert (TARGET_64BIT);
16084 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16085 code = 'q';
16087 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
16089 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
16090 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
16091 if (parts.base != NULL_RTX)
16093 parts.index = parts.base;
16094 parts.scale = 1;
16096 parts.base = XVECEXP (addr, 0, 0);
16097 addr = XVECEXP (addr, 0, 0);
16099 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
16101 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16102 gcc_assert (parts.index == NULL_RTX);
16103 parts.index = XVECEXP (addr, 0, 1);
16104 addr = XVECEXP (addr, 0, 0);
16106 else
16107 ok = ix86_decompose_address (addr, &parts);
16109 gcc_assert (ok);
16111 base = parts.base;
16112 index = parts.index;
16113 disp = parts.disp;
16114 scale = parts.scale;
16116 switch (parts.seg)
16118 case SEG_DEFAULT:
16119 break;
16120 case SEG_FS:
16121 case SEG_GS:
16122 if (ASSEMBLER_DIALECT == ASM_ATT)
16123 putc ('%', file);
16124 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
16125 break;
16126 default:
16127 gcc_unreachable ();
16130 /* Use one byte shorter RIP relative addressing for 64bit mode. */
16131 if (TARGET_64BIT && !base && !index)
16133 rtx symbol = disp;
16135 if (GET_CODE (disp) == CONST
16136 && GET_CODE (XEXP (disp, 0)) == PLUS
16137 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16138 symbol = XEXP (XEXP (disp, 0), 0);
16140 if (GET_CODE (symbol) == LABEL_REF
16141 || (GET_CODE (symbol) == SYMBOL_REF
16142 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
16143 base = pc_rtx;
16145 if (!base && !index)
16147 /* Displacement only requires special attention. */
16149 if (CONST_INT_P (disp))
16151 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
16152 fputs ("ds:", file);
16153 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
16155 else if (flag_pic)
16156 output_pic_addr_const (file, disp, 0);
16157 else
16158 output_addr_const (file, disp);
16160 else
16162 /* Print SImode register names to force addr32 prefix. */
16163 if (SImode_address_operand (addr, VOIDmode))
16165 #ifdef ENABLE_CHECKING
16166 gcc_assert (TARGET_64BIT);
16167 switch (GET_CODE (addr))
16169 case SUBREG:
16170 gcc_assert (GET_MODE (addr) == SImode);
16171 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
16172 break;
16173 case ZERO_EXTEND:
16174 case AND:
16175 gcc_assert (GET_MODE (addr) == DImode);
16176 break;
16177 default:
16178 gcc_unreachable ();
16180 #endif
16181 gcc_assert (!code);
16182 code = 'k';
16184 else if (code == 0
16185 && TARGET_X32
16186 && disp
16187 && CONST_INT_P (disp)
16188 && INTVAL (disp) < -16*1024*1024)
16190 /* X32 runs in 64-bit mode, where displacement, DISP, in
16191 address DISP(%r64), is encoded as 32-bit immediate sign-
16192 extended from 32-bit to 64-bit. For -0x40000300(%r64),
16193 address is %r64 + 0xffffffffbffffd00. When %r64 <
16194 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
16195 which is invalid for x32. The correct address is %r64
16196 - 0x40000300 == 0xf7ffdd64. To properly encode
16197 -0x40000300(%r64) for x32, we zero-extend negative
16198 displacement by forcing addr32 prefix which truncates
16199 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
16200 zero-extend all negative displacements, including -1(%rsp).
16201 However, for small negative displacements, sign-extension
16202 won't cause overflow. We only zero-extend negative
16203 displacements if they < -16*1024*1024, which is also used
16204 to check legitimate address displacements for PIC. */
16205 code = 'k';
16208 if (ASSEMBLER_DIALECT == ASM_ATT)
16210 if (disp)
16212 if (flag_pic)
16213 output_pic_addr_const (file, disp, 0);
16214 else if (GET_CODE (disp) == LABEL_REF)
16215 output_asm_label (disp);
16216 else
16217 output_addr_const (file, disp);
16220 putc ('(', file);
16221 if (base)
16222 print_reg (base, code, file);
16223 if (index)
16225 putc (',', file);
16226 print_reg (index, vsib ? 0 : code, file);
16227 if (scale != 1 || vsib)
16228 fprintf (file, ",%d", scale);
16230 putc (')', file);
16232 else
16234 rtx offset = NULL_RTX;
16236 if (disp)
16238 /* Pull out the offset of a symbol; print any symbol itself. */
16239 if (GET_CODE (disp) == CONST
16240 && GET_CODE (XEXP (disp, 0)) == PLUS
16241 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16243 offset = XEXP (XEXP (disp, 0), 1);
16244 disp = gen_rtx_CONST (VOIDmode,
16245 XEXP (XEXP (disp, 0), 0));
16248 if (flag_pic)
16249 output_pic_addr_const (file, disp, 0);
16250 else if (GET_CODE (disp) == LABEL_REF)
16251 output_asm_label (disp);
16252 else if (CONST_INT_P (disp))
16253 offset = disp;
16254 else
16255 output_addr_const (file, disp);
16258 putc ('[', file);
16259 if (base)
16261 print_reg (base, code, file);
16262 if (offset)
16264 if (INTVAL (offset) >= 0)
16265 putc ('+', file);
16266 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16269 else if (offset)
16270 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16271 else
16272 putc ('0', file);
16274 if (index)
16276 putc ('+', file);
16277 print_reg (index, vsib ? 0 : code, file);
16278 if (scale != 1 || vsib)
16279 fprintf (file, "*%d", scale);
16281 putc (']', file);
16286 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
16288 static bool
16289 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16291 rtx op;
16293 if (GET_CODE (x) != UNSPEC)
16294 return false;
16296 op = XVECEXP (x, 0, 0);
16297 switch (XINT (x, 1))
16299 case UNSPEC_GOTTPOFF:
16300 output_addr_const (file, op);
16301 /* FIXME: This might be @TPOFF in Sun ld. */
16302 fputs ("@gottpoff", file);
16303 break;
16304 case UNSPEC_TPOFF:
16305 output_addr_const (file, op);
16306 fputs ("@tpoff", file);
16307 break;
16308 case UNSPEC_NTPOFF:
16309 output_addr_const (file, op);
16310 if (TARGET_64BIT)
16311 fputs ("@tpoff", file);
16312 else
16313 fputs ("@ntpoff", file);
16314 break;
16315 case UNSPEC_DTPOFF:
16316 output_addr_const (file, op);
16317 fputs ("@dtpoff", file);
16318 break;
16319 case UNSPEC_GOTNTPOFF:
16320 output_addr_const (file, op);
16321 if (TARGET_64BIT)
16322 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16323 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16324 else
16325 fputs ("@gotntpoff", file);
16326 break;
16327 case UNSPEC_INDNTPOFF:
16328 output_addr_const (file, op);
16329 fputs ("@indntpoff", file);
16330 break;
16331 #if TARGET_MACHO
16332 case UNSPEC_MACHOPIC_OFFSET:
16333 output_addr_const (file, op);
16334 putc ('-', file);
16335 machopic_output_function_base_name (file);
16336 break;
16337 #endif
16339 case UNSPEC_STACK_CHECK:
16341 int offset;
16343 gcc_assert (flag_split_stack);
16345 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16346 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16347 #else
16348 gcc_unreachable ();
16349 #endif
16351 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16353 break;
16355 default:
16356 return false;
16359 return true;
16362 /* Split one or more double-mode RTL references into pairs of half-mode
16363 references. The RTL can be REG, offsettable MEM, integer constant, or
16364 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16365 split and "num" is its length. lo_half and hi_half are output arrays
16366 that parallel "operands". */
16368 void
16369 split_double_mode (machine_mode mode, rtx operands[],
16370 int num, rtx lo_half[], rtx hi_half[])
16372 machine_mode half_mode;
16373 unsigned int byte;
16375 switch (mode)
16377 case TImode:
16378 half_mode = DImode;
16379 break;
16380 case DImode:
16381 half_mode = SImode;
16382 break;
16383 default:
16384 gcc_unreachable ();
16387 byte = GET_MODE_SIZE (half_mode);
16389 while (num--)
16391 rtx op = operands[num];
16393 /* simplify_subreg refuse to split volatile memory addresses,
16394 but we still have to handle it. */
16395 if (MEM_P (op))
16397 lo_half[num] = adjust_address (op, half_mode, 0);
16398 hi_half[num] = adjust_address (op, half_mode, byte);
16400 else
16402 lo_half[num] = simplify_gen_subreg (half_mode, op,
16403 GET_MODE (op) == VOIDmode
16404 ? mode : GET_MODE (op), 0);
16405 hi_half[num] = simplify_gen_subreg (half_mode, op,
16406 GET_MODE (op) == VOIDmode
16407 ? mode : GET_MODE (op), byte);
16412 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16413 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16414 is the expression of the binary operation. The output may either be
16415 emitted here, or returned to the caller, like all output_* functions.
16417 There is no guarantee that the operands are the same mode, as they
16418 might be within FLOAT or FLOAT_EXTEND expressions. */
16420 #ifndef SYSV386_COMPAT
16421 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16422 wants to fix the assemblers because that causes incompatibility
16423 with gcc. No-one wants to fix gcc because that causes
16424 incompatibility with assemblers... You can use the option of
16425 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16426 #define SYSV386_COMPAT 1
16427 #endif
16429 const char *
16430 output_387_binary_op (rtx insn, rtx *operands)
16432 static char buf[40];
16433 const char *p;
16434 const char *ssep;
16435 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16437 #ifdef ENABLE_CHECKING
16438 /* Even if we do not want to check the inputs, this documents input
16439 constraints. Which helps in understanding the following code. */
16440 if (STACK_REG_P (operands[0])
16441 && ((REG_P (operands[1])
16442 && REGNO (operands[0]) == REGNO (operands[1])
16443 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16444 || (REG_P (operands[2])
16445 && REGNO (operands[0]) == REGNO (operands[2])
16446 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16447 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16448 ; /* ok */
16449 else
16450 gcc_assert (is_sse);
16451 #endif
16453 switch (GET_CODE (operands[3]))
16455 case PLUS:
16456 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16457 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16458 p = "fiadd";
16459 else
16460 p = "fadd";
16461 ssep = "vadd";
16462 break;
16464 case MINUS:
16465 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16466 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16467 p = "fisub";
16468 else
16469 p = "fsub";
16470 ssep = "vsub";
16471 break;
16473 case MULT:
16474 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16475 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16476 p = "fimul";
16477 else
16478 p = "fmul";
16479 ssep = "vmul";
16480 break;
16482 case DIV:
16483 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16484 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16485 p = "fidiv";
16486 else
16487 p = "fdiv";
16488 ssep = "vdiv";
16489 break;
16491 default:
16492 gcc_unreachable ();
16495 if (is_sse)
16497 if (TARGET_AVX)
16499 strcpy (buf, ssep);
16500 if (GET_MODE (operands[0]) == SFmode)
16501 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16502 else
16503 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16505 else
16507 strcpy (buf, ssep + 1);
16508 if (GET_MODE (operands[0]) == SFmode)
16509 strcat (buf, "ss\t{%2, %0|%0, %2}");
16510 else
16511 strcat (buf, "sd\t{%2, %0|%0, %2}");
16513 return buf;
16515 strcpy (buf, p);
16517 switch (GET_CODE (operands[3]))
16519 case MULT:
16520 case PLUS:
16521 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16522 std::swap (operands[1], operands[2]);
16524 /* know operands[0] == operands[1]. */
16526 if (MEM_P (operands[2]))
16528 p = "%Z2\t%2";
16529 break;
16532 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16534 if (STACK_TOP_P (operands[0]))
16535 /* How is it that we are storing to a dead operand[2]?
16536 Well, presumably operands[1] is dead too. We can't
16537 store the result to st(0) as st(0) gets popped on this
16538 instruction. Instead store to operands[2] (which I
16539 think has to be st(1)). st(1) will be popped later.
16540 gcc <= 2.8.1 didn't have this check and generated
16541 assembly code that the Unixware assembler rejected. */
16542 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16543 else
16544 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16545 break;
16548 if (STACK_TOP_P (operands[0]))
16549 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16550 else
16551 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16552 break;
16554 case MINUS:
16555 case DIV:
16556 if (MEM_P (operands[1]))
16558 p = "r%Z1\t%1";
16559 break;
16562 if (MEM_P (operands[2]))
16564 p = "%Z2\t%2";
16565 break;
16568 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16570 #if SYSV386_COMPAT
16571 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16572 derived assemblers, confusingly reverse the direction of
16573 the operation for fsub{r} and fdiv{r} when the
16574 destination register is not st(0). The Intel assembler
16575 doesn't have this brain damage. Read !SYSV386_COMPAT to
16576 figure out what the hardware really does. */
16577 if (STACK_TOP_P (operands[0]))
16578 p = "{p\t%0, %2|rp\t%2, %0}";
16579 else
16580 p = "{rp\t%2, %0|p\t%0, %2}";
16581 #else
16582 if (STACK_TOP_P (operands[0]))
16583 /* As above for fmul/fadd, we can't store to st(0). */
16584 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16585 else
16586 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16587 #endif
16588 break;
16591 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16593 #if SYSV386_COMPAT
16594 if (STACK_TOP_P (operands[0]))
16595 p = "{rp\t%0, %1|p\t%1, %0}";
16596 else
16597 p = "{p\t%1, %0|rp\t%0, %1}";
16598 #else
16599 if (STACK_TOP_P (operands[0]))
16600 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16601 else
16602 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16603 #endif
16604 break;
16607 if (STACK_TOP_P (operands[0]))
16609 if (STACK_TOP_P (operands[1]))
16610 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16611 else
16612 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16613 break;
16615 else if (STACK_TOP_P (operands[1]))
16617 #if SYSV386_COMPAT
16618 p = "{\t%1, %0|r\t%0, %1}";
16619 #else
16620 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16621 #endif
16623 else
16625 #if SYSV386_COMPAT
16626 p = "{r\t%2, %0|\t%0, %2}";
16627 #else
16628 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16629 #endif
16631 break;
16633 default:
16634 gcc_unreachable ();
16637 strcat (buf, p);
16638 return buf;
16641 /* Check if a 256bit AVX register is referenced inside of EXP. */
16643 static bool
16644 ix86_check_avx256_register (const_rtx exp)
16646 if (GET_CODE (exp) == SUBREG)
16647 exp = SUBREG_REG (exp);
16649 return (REG_P (exp)
16650 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16653 /* Return needed mode for entity in optimize_mode_switching pass. */
16655 static int
16656 ix86_avx_u128_mode_needed (rtx_insn *insn)
16658 if (CALL_P (insn))
16660 rtx link;
16662 /* Needed mode is set to AVX_U128_CLEAN if there are
16663 no 256bit modes used in function arguments. */
16664 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16665 link;
16666 link = XEXP (link, 1))
16668 if (GET_CODE (XEXP (link, 0)) == USE)
16670 rtx arg = XEXP (XEXP (link, 0), 0);
16672 if (ix86_check_avx256_register (arg))
16673 return AVX_U128_DIRTY;
16677 return AVX_U128_CLEAN;
16680 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16681 changes state only when a 256bit register is written to, but we need
16682 to prevent the compiler from moving optimal insertion point above
16683 eventual read from 256bit register. */
16684 subrtx_iterator::array_type array;
16685 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16686 if (ix86_check_avx256_register (*iter))
16687 return AVX_U128_DIRTY;
16689 return AVX_U128_ANY;
16692 /* Return mode that i387 must be switched into
16693 prior to the execution of insn. */
16695 static int
16696 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16698 enum attr_i387_cw mode;
16700 /* The mode UNINITIALIZED is used to store control word after a
16701 function call or ASM pattern. The mode ANY specify that function
16702 has no requirements on the control word and make no changes in the
16703 bits we are interested in. */
16705 if (CALL_P (insn)
16706 || (NONJUMP_INSN_P (insn)
16707 && (asm_noperands (PATTERN (insn)) >= 0
16708 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16709 return I387_CW_UNINITIALIZED;
16711 if (recog_memoized (insn) < 0)
16712 return I387_CW_ANY;
16714 mode = get_attr_i387_cw (insn);
16716 switch (entity)
16718 case I387_TRUNC:
16719 if (mode == I387_CW_TRUNC)
16720 return mode;
16721 break;
16723 case I387_FLOOR:
16724 if (mode == I387_CW_FLOOR)
16725 return mode;
16726 break;
16728 case I387_CEIL:
16729 if (mode == I387_CW_CEIL)
16730 return mode;
16731 break;
16733 case I387_MASK_PM:
16734 if (mode == I387_CW_MASK_PM)
16735 return mode;
16736 break;
16738 default:
16739 gcc_unreachable ();
16742 return I387_CW_ANY;
16745 /* Return mode that entity must be switched into
16746 prior to the execution of insn. */
16748 static int
16749 ix86_mode_needed (int entity, rtx_insn *insn)
16751 switch (entity)
16753 case AVX_U128:
16754 return ix86_avx_u128_mode_needed (insn);
16755 case I387_TRUNC:
16756 case I387_FLOOR:
16757 case I387_CEIL:
16758 case I387_MASK_PM:
16759 return ix86_i387_mode_needed (entity, insn);
16760 default:
16761 gcc_unreachable ();
16763 return 0;
16766 /* Check if a 256bit AVX register is referenced in stores. */
16768 static void
16769 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16771 if (ix86_check_avx256_register (dest))
16773 bool *used = (bool *) data;
16774 *used = true;
16778 /* Calculate mode of upper 128bit AVX registers after the insn. */
16780 static int
16781 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16783 rtx pat = PATTERN (insn);
16785 if (vzeroupper_operation (pat, VOIDmode)
16786 || vzeroall_operation (pat, VOIDmode))
16787 return AVX_U128_CLEAN;
16789 /* We know that state is clean after CALL insn if there are no
16790 256bit registers used in the function return register. */
16791 if (CALL_P (insn))
16793 bool avx_reg256_found = false;
16794 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16796 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16799 /* Otherwise, return current mode. Remember that if insn
16800 references AVX 256bit registers, the mode was already changed
16801 to DIRTY from MODE_NEEDED. */
16802 return mode;
16805 /* Return the mode that an insn results in. */
16807 static int
16808 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16810 switch (entity)
16812 case AVX_U128:
16813 return ix86_avx_u128_mode_after (mode, insn);
16814 case I387_TRUNC:
16815 case I387_FLOOR:
16816 case I387_CEIL:
16817 case I387_MASK_PM:
16818 return mode;
16819 default:
16820 gcc_unreachable ();
16824 static int
16825 ix86_avx_u128_mode_entry (void)
16827 tree arg;
16829 /* Entry mode is set to AVX_U128_DIRTY if there are
16830 256bit modes used in function arguments. */
16831 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16832 arg = TREE_CHAIN (arg))
16834 rtx incoming = DECL_INCOMING_RTL (arg);
16836 if (incoming && ix86_check_avx256_register (incoming))
16837 return AVX_U128_DIRTY;
16840 return AVX_U128_CLEAN;
16843 /* Return a mode that ENTITY is assumed to be
16844 switched to at function entry. */
16846 static int
16847 ix86_mode_entry (int entity)
16849 switch (entity)
16851 case AVX_U128:
16852 return ix86_avx_u128_mode_entry ();
16853 case I387_TRUNC:
16854 case I387_FLOOR:
16855 case I387_CEIL:
16856 case I387_MASK_PM:
16857 return I387_CW_ANY;
16858 default:
16859 gcc_unreachable ();
16863 static int
16864 ix86_avx_u128_mode_exit (void)
16866 rtx reg = crtl->return_rtx;
16868 /* Exit mode is set to AVX_U128_DIRTY if there are
16869 256bit modes used in the function return register. */
16870 if (reg && ix86_check_avx256_register (reg))
16871 return AVX_U128_DIRTY;
16873 return AVX_U128_CLEAN;
16876 /* Return a mode that ENTITY is assumed to be
16877 switched to at function exit. */
16879 static int
16880 ix86_mode_exit (int entity)
16882 switch (entity)
16884 case AVX_U128:
16885 return ix86_avx_u128_mode_exit ();
16886 case I387_TRUNC:
16887 case I387_FLOOR:
16888 case I387_CEIL:
16889 case I387_MASK_PM:
16890 return I387_CW_ANY;
16891 default:
16892 gcc_unreachable ();
16896 static int
16897 ix86_mode_priority (int, int n)
16899 return n;
16902 /* Output code to initialize control word copies used by trunc?f?i and
16903 rounding patterns. CURRENT_MODE is set to current control word,
16904 while NEW_MODE is set to new control word. */
16906 static void
16907 emit_i387_cw_initialization (int mode)
16909 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16910 rtx new_mode;
16912 enum ix86_stack_slot slot;
16914 rtx reg = gen_reg_rtx (HImode);
16916 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16917 emit_move_insn (reg, copy_rtx (stored_mode));
16919 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16920 || optimize_insn_for_size_p ())
16922 switch (mode)
16924 case I387_CW_TRUNC:
16925 /* round toward zero (truncate) */
16926 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16927 slot = SLOT_CW_TRUNC;
16928 break;
16930 case I387_CW_FLOOR:
16931 /* round down toward -oo */
16932 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16933 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16934 slot = SLOT_CW_FLOOR;
16935 break;
16937 case I387_CW_CEIL:
16938 /* round up toward +oo */
16939 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16940 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16941 slot = SLOT_CW_CEIL;
16942 break;
16944 case I387_CW_MASK_PM:
16945 /* mask precision exception for nearbyint() */
16946 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16947 slot = SLOT_CW_MASK_PM;
16948 break;
16950 default:
16951 gcc_unreachable ();
16954 else
16956 switch (mode)
16958 case I387_CW_TRUNC:
16959 /* round toward zero (truncate) */
16960 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16961 slot = SLOT_CW_TRUNC;
16962 break;
16964 case I387_CW_FLOOR:
16965 /* round down toward -oo */
16966 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16967 slot = SLOT_CW_FLOOR;
16968 break;
16970 case I387_CW_CEIL:
16971 /* round up toward +oo */
16972 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16973 slot = SLOT_CW_CEIL;
16974 break;
16976 case I387_CW_MASK_PM:
16977 /* mask precision exception for nearbyint() */
16978 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16979 slot = SLOT_CW_MASK_PM;
16980 break;
16982 default:
16983 gcc_unreachable ();
16987 gcc_assert (slot < MAX_386_STACK_LOCALS);
16989 new_mode = assign_386_stack_local (HImode, slot);
16990 emit_move_insn (new_mode, reg);
16993 /* Emit vzeroupper. */
16995 void
16996 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16998 int i;
17000 /* Cancel automatic vzeroupper insertion if there are
17001 live call-saved SSE registers at the insertion point. */
17003 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17004 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
17005 return;
17007 if (TARGET_64BIT)
17008 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17009 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
17010 return;
17012 emit_insn (gen_avx_vzeroupper ());
17015 /* Generate one or more insns to set ENTITY to MODE. */
17017 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
17018 is the set of hard registers live at the point where the insn(s)
17019 are to be inserted. */
17021 static void
17022 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
17023 HARD_REG_SET regs_live)
17025 switch (entity)
17027 case AVX_U128:
17028 if (mode == AVX_U128_CLEAN)
17029 ix86_avx_emit_vzeroupper (regs_live);
17030 break;
17031 case I387_TRUNC:
17032 case I387_FLOOR:
17033 case I387_CEIL:
17034 case I387_MASK_PM:
17035 if (mode != I387_CW_ANY
17036 && mode != I387_CW_UNINITIALIZED)
17037 emit_i387_cw_initialization (mode);
17038 break;
17039 default:
17040 gcc_unreachable ();
17044 /* Output code for INSN to convert a float to a signed int. OPERANDS
17045 are the insn operands. The output may be [HSD]Imode and the input
17046 operand may be [SDX]Fmode. */
17048 const char *
17049 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
17051 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17052 int dimode_p = GET_MODE (operands[0]) == DImode;
17053 int round_mode = get_attr_i387_cw (insn);
17055 /* Jump through a hoop or two for DImode, since the hardware has no
17056 non-popping instruction. We used to do this a different way, but
17057 that was somewhat fragile and broke with post-reload splitters. */
17058 if ((dimode_p || fisttp) && !stack_top_dies)
17059 output_asm_insn ("fld\t%y1", operands);
17061 gcc_assert (STACK_TOP_P (operands[1]));
17062 gcc_assert (MEM_P (operands[0]));
17063 gcc_assert (GET_MODE (operands[1]) != TFmode);
17065 if (fisttp)
17066 output_asm_insn ("fisttp%Z0\t%0", operands);
17067 else
17069 if (round_mode != I387_CW_ANY)
17070 output_asm_insn ("fldcw\t%3", operands);
17071 if (stack_top_dies || dimode_p)
17072 output_asm_insn ("fistp%Z0\t%0", operands);
17073 else
17074 output_asm_insn ("fist%Z0\t%0", operands);
17075 if (round_mode != I387_CW_ANY)
17076 output_asm_insn ("fldcw\t%2", operands);
17079 return "";
17082 /* Output code for x87 ffreep insn. The OPNO argument, which may only
17083 have the values zero or one, indicates the ffreep insn's operand
17084 from the OPERANDS array. */
17086 static const char *
17087 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
17089 if (TARGET_USE_FFREEP)
17090 #ifdef HAVE_AS_IX86_FFREEP
17091 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
17092 #else
17094 static char retval[32];
17095 int regno = REGNO (operands[opno]);
17097 gcc_assert (STACK_REGNO_P (regno));
17099 regno -= FIRST_STACK_REG;
17101 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
17102 return retval;
17104 #endif
17106 return opno ? "fstp\t%y1" : "fstp\t%y0";
17110 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
17111 should be used. UNORDERED_P is true when fucom should be used. */
17113 const char *
17114 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
17116 int stack_top_dies;
17117 rtx cmp_op0, cmp_op1;
17118 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
17120 if (eflags_p)
17122 cmp_op0 = operands[0];
17123 cmp_op1 = operands[1];
17125 else
17127 cmp_op0 = operands[1];
17128 cmp_op1 = operands[2];
17131 if (is_sse)
17133 if (GET_MODE (operands[0]) == SFmode)
17134 if (unordered_p)
17135 return "%vucomiss\t{%1, %0|%0, %1}";
17136 else
17137 return "%vcomiss\t{%1, %0|%0, %1}";
17138 else
17139 if (unordered_p)
17140 return "%vucomisd\t{%1, %0|%0, %1}";
17141 else
17142 return "%vcomisd\t{%1, %0|%0, %1}";
17145 gcc_assert (STACK_TOP_P (cmp_op0));
17147 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17149 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
17151 if (stack_top_dies)
17153 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
17154 return output_387_ffreep (operands, 1);
17156 else
17157 return "ftst\n\tfnstsw\t%0";
17160 if (STACK_REG_P (cmp_op1)
17161 && stack_top_dies
17162 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
17163 && REGNO (cmp_op1) != FIRST_STACK_REG)
17165 /* If both the top of the 387 stack dies, and the other operand
17166 is also a stack register that dies, then this must be a
17167 `fcompp' float compare */
17169 if (eflags_p)
17171 /* There is no double popping fcomi variant. Fortunately,
17172 eflags is immune from the fstp's cc clobbering. */
17173 if (unordered_p)
17174 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
17175 else
17176 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
17177 return output_387_ffreep (operands, 0);
17179 else
17181 if (unordered_p)
17182 return "fucompp\n\tfnstsw\t%0";
17183 else
17184 return "fcompp\n\tfnstsw\t%0";
17187 else
17189 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
17191 static const char * const alt[16] =
17193 "fcom%Z2\t%y2\n\tfnstsw\t%0",
17194 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
17195 "fucom%Z2\t%y2\n\tfnstsw\t%0",
17196 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
17198 "ficom%Z2\t%y2\n\tfnstsw\t%0",
17199 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
17200 NULL,
17201 NULL,
17203 "fcomi\t{%y1, %0|%0, %y1}",
17204 "fcomip\t{%y1, %0|%0, %y1}",
17205 "fucomi\t{%y1, %0|%0, %y1}",
17206 "fucomip\t{%y1, %0|%0, %y1}",
17208 NULL,
17209 NULL,
17210 NULL,
17211 NULL
17214 int mask;
17215 const char *ret;
17217 mask = eflags_p << 3;
17218 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17219 mask |= unordered_p << 1;
17220 mask |= stack_top_dies;
17222 gcc_assert (mask < 16);
17223 ret = alt[mask];
17224 gcc_assert (ret);
17226 return ret;
17230 void
17231 ix86_output_addr_vec_elt (FILE *file, int value)
17233 const char *directive = ASM_LONG;
17235 #ifdef ASM_QUAD
17236 if (TARGET_LP64)
17237 directive = ASM_QUAD;
17238 #else
17239 gcc_assert (!TARGET_64BIT);
17240 #endif
17242 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17245 void
17246 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17248 const char *directive = ASM_LONG;
17250 #ifdef ASM_QUAD
17251 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17252 directive = ASM_QUAD;
17253 #else
17254 gcc_assert (!TARGET_64BIT);
17255 #endif
17256 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
17257 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17258 fprintf (file, "%s%s%d-%s%d\n",
17259 directive, LPREFIX, value, LPREFIX, rel);
17260 else if (HAVE_AS_GOTOFF_IN_DATA)
17261 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17262 #if TARGET_MACHO
17263 else if (TARGET_MACHO)
17265 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17266 machopic_output_function_base_name (file);
17267 putc ('\n', file);
17269 #endif
17270 else
17271 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17272 GOT_SYMBOL_NAME, LPREFIX, value);
17275 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17276 for the target. */
17278 void
17279 ix86_expand_clear (rtx dest)
17281 rtx tmp;
17283 /* We play register width games, which are only valid after reload. */
17284 gcc_assert (reload_completed);
17286 /* Avoid HImode and its attendant prefix byte. */
17287 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17288 dest = gen_rtx_REG (SImode, REGNO (dest));
17289 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
17291 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17293 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17294 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17297 emit_insn (tmp);
17300 /* X is an unchanging MEM. If it is a constant pool reference, return
17301 the constant pool rtx, else NULL. */
17304 maybe_get_pool_constant (rtx x)
17306 x = ix86_delegitimize_address (XEXP (x, 0));
17308 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17309 return get_pool_constant (x);
17311 return NULL_RTX;
17314 void
17315 ix86_expand_move (machine_mode mode, rtx operands[])
17317 rtx op0, op1;
17318 enum tls_model model;
17320 op0 = operands[0];
17321 op1 = operands[1];
17323 if (GET_CODE (op1) == SYMBOL_REF)
17325 rtx tmp;
17327 model = SYMBOL_REF_TLS_MODEL (op1);
17328 if (model)
17330 op1 = legitimize_tls_address (op1, model, true);
17331 op1 = force_operand (op1, op0);
17332 if (op1 == op0)
17333 return;
17334 op1 = convert_to_mode (mode, op1, 1);
17336 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17337 op1 = tmp;
17339 else if (GET_CODE (op1) == CONST
17340 && GET_CODE (XEXP (op1, 0)) == PLUS
17341 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17343 rtx addend = XEXP (XEXP (op1, 0), 1);
17344 rtx symbol = XEXP (XEXP (op1, 0), 0);
17345 rtx tmp;
17347 model = SYMBOL_REF_TLS_MODEL (symbol);
17348 if (model)
17349 tmp = legitimize_tls_address (symbol, model, true);
17350 else
17351 tmp = legitimize_pe_coff_symbol (symbol, true);
17353 if (tmp)
17355 tmp = force_operand (tmp, NULL);
17356 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17357 op0, 1, OPTAB_DIRECT);
17358 if (tmp == op0)
17359 return;
17360 op1 = convert_to_mode (mode, tmp, 1);
17364 if ((flag_pic || MACHOPIC_INDIRECT)
17365 && symbolic_operand (op1, mode))
17367 if (TARGET_MACHO && !TARGET_64BIT)
17369 #if TARGET_MACHO
17370 /* dynamic-no-pic */
17371 if (MACHOPIC_INDIRECT)
17373 rtx temp = ((reload_in_progress
17374 || ((op0 && REG_P (op0))
17375 && mode == Pmode))
17376 ? op0 : gen_reg_rtx (Pmode));
17377 op1 = machopic_indirect_data_reference (op1, temp);
17378 if (MACHOPIC_PURE)
17379 op1 = machopic_legitimize_pic_address (op1, mode,
17380 temp == op1 ? 0 : temp);
17382 if (op0 != op1 && GET_CODE (op0) != MEM)
17384 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
17385 emit_insn (insn);
17386 return;
17388 if (GET_CODE (op0) == MEM)
17389 op1 = force_reg (Pmode, op1);
17390 else
17392 rtx temp = op0;
17393 if (GET_CODE (temp) != REG)
17394 temp = gen_reg_rtx (Pmode);
17395 temp = legitimize_pic_address (op1, temp);
17396 if (temp == op0)
17397 return;
17398 op1 = temp;
17400 /* dynamic-no-pic */
17401 #endif
17403 else
17405 if (MEM_P (op0))
17406 op1 = force_reg (mode, op1);
17407 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17409 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17410 op1 = legitimize_pic_address (op1, reg);
17411 if (op0 == op1)
17412 return;
17413 op1 = convert_to_mode (mode, op1, 1);
17417 else
17419 if (MEM_P (op0)
17420 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17421 || !push_operand (op0, mode))
17422 && MEM_P (op1))
17423 op1 = force_reg (mode, op1);
17425 if (push_operand (op0, mode)
17426 && ! general_no_elim_operand (op1, mode))
17427 op1 = copy_to_mode_reg (mode, op1);
17429 /* Force large constants in 64bit compilation into register
17430 to get them CSEed. */
17431 if (can_create_pseudo_p ()
17432 && (mode == DImode) && TARGET_64BIT
17433 && immediate_operand (op1, mode)
17434 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17435 && !register_operand (op0, mode)
17436 && optimize)
17437 op1 = copy_to_mode_reg (mode, op1);
17439 if (can_create_pseudo_p ()
17440 && FLOAT_MODE_P (mode)
17441 && GET_CODE (op1) == CONST_DOUBLE)
17443 /* If we are loading a floating point constant to a register,
17444 force the value to memory now, since we'll get better code
17445 out the back end. */
17447 op1 = validize_mem (force_const_mem (mode, op1));
17448 if (!register_operand (op0, mode))
17450 rtx temp = gen_reg_rtx (mode);
17451 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
17452 emit_move_insn (op0, temp);
17453 return;
17458 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17461 void
17462 ix86_expand_vector_move (machine_mode mode, rtx operands[])
17464 rtx op0 = operands[0], op1 = operands[1];
17465 unsigned int align = GET_MODE_ALIGNMENT (mode);
17467 if (push_operand (op0, VOIDmode))
17468 op0 = emit_move_resolve_push (mode, op0);
17470 /* Force constants other than zero into memory. We do not know how
17471 the instructions used to build constants modify the upper 64 bits
17472 of the register, once we have that information we may be able
17473 to handle some of them more efficiently. */
17474 if (can_create_pseudo_p ()
17475 && register_operand (op0, mode)
17476 && (CONSTANT_P (op1)
17477 || (GET_CODE (op1) == SUBREG
17478 && CONSTANT_P (SUBREG_REG (op1))))
17479 && !standard_sse_constant_p (op1))
17480 op1 = validize_mem (force_const_mem (mode, op1));
17482 /* We need to check memory alignment for SSE mode since attribute
17483 can make operands unaligned. */
17484 if (can_create_pseudo_p ()
17485 && SSE_REG_MODE_P (mode)
17486 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17487 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17489 rtx tmp[2];
17491 /* ix86_expand_vector_move_misalign() does not like constants ... */
17492 if (CONSTANT_P (op1)
17493 || (GET_CODE (op1) == SUBREG
17494 && CONSTANT_P (SUBREG_REG (op1))))
17495 op1 = validize_mem (force_const_mem (mode, op1));
17497 /* ... nor both arguments in memory. */
17498 if (!register_operand (op0, mode)
17499 && !register_operand (op1, mode))
17500 op1 = force_reg (mode, op1);
17502 tmp[0] = op0; tmp[1] = op1;
17503 ix86_expand_vector_move_misalign (mode, tmp);
17504 return;
17507 /* Make operand1 a register if it isn't already. */
17508 if (can_create_pseudo_p ()
17509 && !register_operand (op0, mode)
17510 && !register_operand (op1, mode))
17512 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17513 return;
17516 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17519 /* Split 32-byte AVX unaligned load and store if needed. */
17521 static void
17522 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17524 rtx m;
17525 rtx (*extract) (rtx, rtx, rtx);
17526 rtx (*load_unaligned) (rtx, rtx);
17527 rtx (*store_unaligned) (rtx, rtx);
17528 machine_mode mode;
17530 switch (GET_MODE (op0))
17532 default:
17533 gcc_unreachable ();
17534 case V32QImode:
17535 extract = gen_avx_vextractf128v32qi;
17536 load_unaligned = gen_avx_loaddquv32qi;
17537 store_unaligned = gen_avx_storedquv32qi;
17538 mode = V16QImode;
17539 break;
17540 case V8SFmode:
17541 extract = gen_avx_vextractf128v8sf;
17542 load_unaligned = gen_avx_loadups256;
17543 store_unaligned = gen_avx_storeups256;
17544 mode = V4SFmode;
17545 break;
17546 case V4DFmode:
17547 extract = gen_avx_vextractf128v4df;
17548 load_unaligned = gen_avx_loadupd256;
17549 store_unaligned = gen_avx_storeupd256;
17550 mode = V2DFmode;
17551 break;
17554 if (MEM_P (op1))
17556 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
17557 && optimize_insn_for_speed_p ())
17559 rtx r = gen_reg_rtx (mode);
17560 m = adjust_address (op1, mode, 0);
17561 emit_move_insn (r, m);
17562 m = adjust_address (op1, mode, 16);
17563 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17564 emit_move_insn (op0, r);
17566 /* Normal *mov<mode>_internal pattern will handle
17567 unaligned loads just fine if misaligned_operand
17568 is true, and without the UNSPEC it can be combined
17569 with arithmetic instructions. */
17570 else if (misaligned_operand (op1, GET_MODE (op1)))
17571 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17572 else
17573 emit_insn (load_unaligned (op0, op1));
17575 else if (MEM_P (op0))
17577 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
17578 && optimize_insn_for_speed_p ())
17580 m = adjust_address (op0, mode, 0);
17581 emit_insn (extract (m, op1, const0_rtx));
17582 m = adjust_address (op0, mode, 16);
17583 emit_insn (extract (m, op1, const1_rtx));
17585 else
17586 emit_insn (store_unaligned (op0, op1));
17588 else
17589 gcc_unreachable ();
17592 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17593 straight to ix86_expand_vector_move. */
17594 /* Code generation for scalar reg-reg moves of single and double precision data:
17595 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17596 movaps reg, reg
17597 else
17598 movss reg, reg
17599 if (x86_sse_partial_reg_dependency == true)
17600 movapd reg, reg
17601 else
17602 movsd reg, reg
17604 Code generation for scalar loads of double precision data:
17605 if (x86_sse_split_regs == true)
17606 movlpd mem, reg (gas syntax)
17607 else
17608 movsd mem, reg
17610 Code generation for unaligned packed loads of single precision data
17611 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17612 if (x86_sse_unaligned_move_optimal)
17613 movups mem, reg
17615 if (x86_sse_partial_reg_dependency == true)
17617 xorps reg, reg
17618 movlps mem, reg
17619 movhps mem+8, reg
17621 else
17623 movlps mem, reg
17624 movhps mem+8, reg
17627 Code generation for unaligned packed loads of double precision data
17628 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17629 if (x86_sse_unaligned_move_optimal)
17630 movupd mem, reg
17632 if (x86_sse_split_regs == true)
17634 movlpd mem, reg
17635 movhpd mem+8, reg
17637 else
17639 movsd mem, reg
17640 movhpd mem+8, reg
17644 void
17645 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17647 rtx op0, op1, orig_op0 = NULL_RTX, m;
17648 rtx (*load_unaligned) (rtx, rtx);
17649 rtx (*store_unaligned) (rtx, rtx);
17651 op0 = operands[0];
17652 op1 = operands[1];
17654 if (GET_MODE_SIZE (mode) == 64)
17656 switch (GET_MODE_CLASS (mode))
17658 case MODE_VECTOR_INT:
17659 case MODE_INT:
17660 if (GET_MODE (op0) != V16SImode)
17662 if (!MEM_P (op0))
17664 orig_op0 = op0;
17665 op0 = gen_reg_rtx (V16SImode);
17667 else
17668 op0 = gen_lowpart (V16SImode, op0);
17670 op1 = gen_lowpart (V16SImode, op1);
17671 /* FALLTHRU */
17673 case MODE_VECTOR_FLOAT:
17674 switch (GET_MODE (op0))
17676 default:
17677 gcc_unreachable ();
17678 case V16SImode:
17679 load_unaligned = gen_avx512f_loaddquv16si;
17680 store_unaligned = gen_avx512f_storedquv16si;
17681 break;
17682 case V16SFmode:
17683 load_unaligned = gen_avx512f_loadups512;
17684 store_unaligned = gen_avx512f_storeups512;
17685 break;
17686 case V8DFmode:
17687 load_unaligned = gen_avx512f_loadupd512;
17688 store_unaligned = gen_avx512f_storeupd512;
17689 break;
17692 if (MEM_P (op1))
17693 emit_insn (load_unaligned (op0, op1));
17694 else if (MEM_P (op0))
17695 emit_insn (store_unaligned (op0, op1));
17696 else
17697 gcc_unreachable ();
17698 if (orig_op0)
17699 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17700 break;
17702 default:
17703 gcc_unreachable ();
17706 return;
17709 if (TARGET_AVX
17710 && GET_MODE_SIZE (mode) == 32)
17712 switch (GET_MODE_CLASS (mode))
17714 case MODE_VECTOR_INT:
17715 case MODE_INT:
17716 if (GET_MODE (op0) != V32QImode)
17718 if (!MEM_P (op0))
17720 orig_op0 = op0;
17721 op0 = gen_reg_rtx (V32QImode);
17723 else
17724 op0 = gen_lowpart (V32QImode, op0);
17726 op1 = gen_lowpart (V32QImode, op1);
17727 /* FALLTHRU */
17729 case MODE_VECTOR_FLOAT:
17730 ix86_avx256_split_vector_move_misalign (op0, op1);
17731 if (orig_op0)
17732 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17733 break;
17735 default:
17736 gcc_unreachable ();
17739 return;
17742 if (MEM_P (op1))
17744 /* Normal *mov<mode>_internal pattern will handle
17745 unaligned loads just fine if misaligned_operand
17746 is true, and without the UNSPEC it can be combined
17747 with arithmetic instructions. */
17748 if (TARGET_AVX
17749 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17750 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17751 && misaligned_operand (op1, GET_MODE (op1)))
17752 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17753 /* ??? If we have typed data, then it would appear that using
17754 movdqu is the only way to get unaligned data loaded with
17755 integer type. */
17756 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17758 if (GET_MODE (op0) != V16QImode)
17760 orig_op0 = op0;
17761 op0 = gen_reg_rtx (V16QImode);
17763 op1 = gen_lowpart (V16QImode, op1);
17764 /* We will eventually emit movups based on insn attributes. */
17765 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17766 if (orig_op0)
17767 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17769 else if (TARGET_SSE2 && mode == V2DFmode)
17771 rtx zero;
17773 if (TARGET_AVX
17774 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17775 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17776 || optimize_insn_for_size_p ())
17778 /* We will eventually emit movups based on insn attributes. */
17779 emit_insn (gen_sse2_loadupd (op0, op1));
17780 return;
17783 /* When SSE registers are split into halves, we can avoid
17784 writing to the top half twice. */
17785 if (TARGET_SSE_SPLIT_REGS)
17787 emit_clobber (op0);
17788 zero = op0;
17790 else
17792 /* ??? Not sure about the best option for the Intel chips.
17793 The following would seem to satisfy; the register is
17794 entirely cleared, breaking the dependency chain. We
17795 then store to the upper half, with a dependency depth
17796 of one. A rumor has it that Intel recommends two movsd
17797 followed by an unpacklpd, but this is unconfirmed. And
17798 given that the dependency depth of the unpacklpd would
17799 still be one, I'm not sure why this would be better. */
17800 zero = CONST0_RTX (V2DFmode);
17803 m = adjust_address (op1, DFmode, 0);
17804 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17805 m = adjust_address (op1, DFmode, 8);
17806 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17808 else
17810 rtx t;
17812 if (TARGET_AVX
17813 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17814 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17815 || optimize_insn_for_size_p ())
17817 if (GET_MODE (op0) != V4SFmode)
17819 orig_op0 = op0;
17820 op0 = gen_reg_rtx (V4SFmode);
17822 op1 = gen_lowpart (V4SFmode, op1);
17823 emit_insn (gen_sse_loadups (op0, op1));
17824 if (orig_op0)
17825 emit_move_insn (orig_op0,
17826 gen_lowpart (GET_MODE (orig_op0), op0));
17827 return;
17830 if (mode != V4SFmode)
17831 t = gen_reg_rtx (V4SFmode);
17832 else
17833 t = op0;
17835 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17836 emit_move_insn (t, CONST0_RTX (V4SFmode));
17837 else
17838 emit_clobber (t);
17840 m = adjust_address (op1, V2SFmode, 0);
17841 emit_insn (gen_sse_loadlps (t, t, m));
17842 m = adjust_address (op1, V2SFmode, 8);
17843 emit_insn (gen_sse_loadhps (t, t, m));
17844 if (mode != V4SFmode)
17845 emit_move_insn (op0, gen_lowpart (mode, t));
17848 else if (MEM_P (op0))
17850 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17852 op0 = gen_lowpart (V16QImode, op0);
17853 op1 = gen_lowpart (V16QImode, op1);
17854 /* We will eventually emit movups based on insn attributes. */
17855 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17857 else if (TARGET_SSE2 && mode == V2DFmode)
17859 if (TARGET_AVX
17860 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17861 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17862 || optimize_insn_for_size_p ())
17863 /* We will eventually emit movups based on insn attributes. */
17864 emit_insn (gen_sse2_storeupd (op0, op1));
17865 else
17867 m = adjust_address (op0, DFmode, 0);
17868 emit_insn (gen_sse2_storelpd (m, op1));
17869 m = adjust_address (op0, DFmode, 8);
17870 emit_insn (gen_sse2_storehpd (m, op1));
17873 else
17875 if (mode != V4SFmode)
17876 op1 = gen_lowpart (V4SFmode, op1);
17878 if (TARGET_AVX
17879 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17880 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17881 || optimize_insn_for_size_p ())
17883 op0 = gen_lowpart (V4SFmode, op0);
17884 emit_insn (gen_sse_storeups (op0, op1));
17886 else
17888 m = adjust_address (op0, V2SFmode, 0);
17889 emit_insn (gen_sse_storelps (m, op1));
17890 m = adjust_address (op0, V2SFmode, 8);
17891 emit_insn (gen_sse_storehps (m, op1));
17895 else
17896 gcc_unreachable ();
17899 /* Helper function of ix86_fixup_binary_operands to canonicalize
17900 operand order. Returns true if the operands should be swapped. */
17902 static bool
17903 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
17904 rtx operands[])
17906 rtx dst = operands[0];
17907 rtx src1 = operands[1];
17908 rtx src2 = operands[2];
17910 /* If the operation is not commutative, we can't do anything. */
17911 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17912 return false;
17914 /* Highest priority is that src1 should match dst. */
17915 if (rtx_equal_p (dst, src1))
17916 return false;
17917 if (rtx_equal_p (dst, src2))
17918 return true;
17920 /* Next highest priority is that immediate constants come second. */
17921 if (immediate_operand (src2, mode))
17922 return false;
17923 if (immediate_operand (src1, mode))
17924 return true;
17926 /* Lowest priority is that memory references should come second. */
17927 if (MEM_P (src2))
17928 return false;
17929 if (MEM_P (src1))
17930 return true;
17932 return false;
17936 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17937 destination to use for the operation. If different from the true
17938 destination in operands[0], a copy operation will be required. */
17941 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
17942 rtx operands[])
17944 rtx dst = operands[0];
17945 rtx src1 = operands[1];
17946 rtx src2 = operands[2];
17948 /* Canonicalize operand order. */
17949 if (ix86_swap_binary_operands_p (code, mode, operands))
17951 /* It is invalid to swap operands of different modes. */
17952 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17954 std::swap (src1, src2);
17957 /* Both source operands cannot be in memory. */
17958 if (MEM_P (src1) && MEM_P (src2))
17960 /* Optimization: Only read from memory once. */
17961 if (rtx_equal_p (src1, src2))
17963 src2 = force_reg (mode, src2);
17964 src1 = src2;
17966 else if (rtx_equal_p (dst, src1))
17967 src2 = force_reg (mode, src2);
17968 else
17969 src1 = force_reg (mode, src1);
17972 /* If the destination is memory, and we do not have matching source
17973 operands, do things in registers. */
17974 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17975 dst = gen_reg_rtx (mode);
17977 /* Source 1 cannot be a constant. */
17978 if (CONSTANT_P (src1))
17979 src1 = force_reg (mode, src1);
17981 /* Source 1 cannot be a non-matching memory. */
17982 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17983 src1 = force_reg (mode, src1);
17985 /* Improve address combine. */
17986 if (code == PLUS
17987 && GET_MODE_CLASS (mode) == MODE_INT
17988 && MEM_P (src2))
17989 src2 = force_reg (mode, src2);
17991 operands[1] = src1;
17992 operands[2] = src2;
17993 return dst;
17996 /* Similarly, but assume that the destination has already been
17997 set up properly. */
17999 void
18000 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
18001 machine_mode mode, rtx operands[])
18003 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
18004 gcc_assert (dst == operands[0]);
18007 /* Attempt to expand a binary operator. Make the expansion closer to the
18008 actual machine, then just general_operand, which will allow 3 separate
18009 memory references (one output, two input) in a single insn. */
18011 void
18012 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
18013 rtx operands[])
18015 rtx src1, src2, dst, op, clob;
18017 dst = ix86_fixup_binary_operands (code, mode, operands);
18018 src1 = operands[1];
18019 src2 = operands[2];
18021 /* Emit the instruction. */
18023 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
18024 if (reload_in_progress)
18026 /* Reload doesn't know about the flags register, and doesn't know that
18027 it doesn't want to clobber it. We can only do this with PLUS. */
18028 gcc_assert (code == PLUS);
18029 emit_insn (op);
18031 else if (reload_completed
18032 && code == PLUS
18033 && !rtx_equal_p (dst, src1))
18035 /* This is going to be an LEA; avoid splitting it later. */
18036 emit_insn (op);
18038 else
18040 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18041 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18044 /* Fix up the destination if needed. */
18045 if (dst != operands[0])
18046 emit_move_insn (operands[0], dst);
18049 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
18050 the given OPERANDS. */
18052 void
18053 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
18054 rtx operands[])
18056 rtx op1 = NULL_RTX, op2 = NULL_RTX;
18057 if (GET_CODE (operands[1]) == SUBREG)
18059 op1 = operands[1];
18060 op2 = operands[2];
18062 else if (GET_CODE (operands[2]) == SUBREG)
18064 op1 = operands[2];
18065 op2 = operands[1];
18067 /* Optimize (__m128i) d | (__m128i) e and similar code
18068 when d and e are float vectors into float vector logical
18069 insn. In C/C++ without using intrinsics there is no other way
18070 to express vector logical operation on float vectors than
18071 to cast them temporarily to integer vectors. */
18072 if (op1
18073 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
18074 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
18075 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
18076 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
18077 && SUBREG_BYTE (op1) == 0
18078 && (GET_CODE (op2) == CONST_VECTOR
18079 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
18080 && SUBREG_BYTE (op2) == 0))
18081 && can_create_pseudo_p ())
18083 rtx dst;
18084 switch (GET_MODE (SUBREG_REG (op1)))
18086 case V4SFmode:
18087 case V8SFmode:
18088 case V16SFmode:
18089 case V2DFmode:
18090 case V4DFmode:
18091 case V8DFmode:
18092 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
18093 if (GET_CODE (op2) == CONST_VECTOR)
18095 op2 = gen_lowpart (GET_MODE (dst), op2);
18096 op2 = force_reg (GET_MODE (dst), op2);
18098 else
18100 op1 = operands[1];
18101 op2 = SUBREG_REG (operands[2]);
18102 if (!nonimmediate_operand (op2, GET_MODE (dst)))
18103 op2 = force_reg (GET_MODE (dst), op2);
18105 op1 = SUBREG_REG (op1);
18106 if (!nonimmediate_operand (op1, GET_MODE (dst)))
18107 op1 = force_reg (GET_MODE (dst), op1);
18108 emit_insn (gen_rtx_SET (VOIDmode, dst,
18109 gen_rtx_fmt_ee (code, GET_MODE (dst),
18110 op1, op2)));
18111 emit_move_insn (operands[0], gen_lowpart (mode, dst));
18112 return;
18113 default:
18114 break;
18117 if (!nonimmediate_operand (operands[1], mode))
18118 operands[1] = force_reg (mode, operands[1]);
18119 if (!nonimmediate_operand (operands[2], mode))
18120 operands[2] = force_reg (mode, operands[2]);
18121 ix86_fixup_binary_operands_no_copy (code, mode, operands);
18122 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18123 gen_rtx_fmt_ee (code, mode, operands[1],
18124 operands[2])));
18127 /* Return TRUE or FALSE depending on whether the binary operator meets the
18128 appropriate constraints. */
18130 bool
18131 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
18132 rtx operands[3])
18134 rtx dst = operands[0];
18135 rtx src1 = operands[1];
18136 rtx src2 = operands[2];
18138 /* Both source operands cannot be in memory. */
18139 if (MEM_P (src1) && MEM_P (src2))
18140 return false;
18142 /* Canonicalize operand order for commutative operators. */
18143 if (ix86_swap_binary_operands_p (code, mode, operands))
18144 std::swap (src1, src2);
18146 /* If the destination is memory, we must have a matching source operand. */
18147 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18148 return false;
18150 /* Source 1 cannot be a constant. */
18151 if (CONSTANT_P (src1))
18152 return false;
18154 /* Source 1 cannot be a non-matching memory. */
18155 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18156 /* Support "andhi/andsi/anddi" as a zero-extending move. */
18157 return (code == AND
18158 && (mode == HImode
18159 || mode == SImode
18160 || (TARGET_64BIT && mode == DImode))
18161 && satisfies_constraint_L (src2));
18163 return true;
18166 /* Attempt to expand a unary operator. Make the expansion closer to the
18167 actual machine, then just general_operand, which will allow 2 separate
18168 memory references (one output, one input) in a single insn. */
18170 void
18171 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
18172 rtx operands[])
18174 bool matching_memory = false;
18175 rtx src, dst, op, clob;
18177 dst = operands[0];
18178 src = operands[1];
18180 /* If the destination is memory, and we do not have matching source
18181 operands, do things in registers. */
18182 if (MEM_P (dst))
18184 if (rtx_equal_p (dst, src))
18185 matching_memory = true;
18186 else
18187 dst = gen_reg_rtx (mode);
18190 /* When source operand is memory, destination must match. */
18191 if (MEM_P (src) && !matching_memory)
18192 src = force_reg (mode, src);
18194 /* Emit the instruction. */
18196 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
18197 if (reload_in_progress || code == NOT)
18199 /* Reload doesn't know about the flags register, and doesn't know that
18200 it doesn't want to clobber it. */
18201 gcc_assert (code == NOT);
18202 emit_insn (op);
18204 else
18206 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18207 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18210 /* Fix up the destination if needed. */
18211 if (dst != operands[0])
18212 emit_move_insn (operands[0], dst);
18215 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18216 divisor are within the range [0-255]. */
18218 void
18219 ix86_split_idivmod (machine_mode mode, rtx operands[],
18220 bool signed_p)
18222 rtx_code_label *end_label, *qimode_label;
18223 rtx insn, div, mod;
18224 rtx scratch, tmp0, tmp1, tmp2;
18225 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18226 rtx (*gen_zero_extend) (rtx, rtx);
18227 rtx (*gen_test_ccno_1) (rtx, rtx);
18229 switch (mode)
18231 case SImode:
18232 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18233 gen_test_ccno_1 = gen_testsi_ccno_1;
18234 gen_zero_extend = gen_zero_extendqisi2;
18235 break;
18236 case DImode:
18237 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18238 gen_test_ccno_1 = gen_testdi_ccno_1;
18239 gen_zero_extend = gen_zero_extendqidi2;
18240 break;
18241 default:
18242 gcc_unreachable ();
18245 end_label = gen_label_rtx ();
18246 qimode_label = gen_label_rtx ();
18248 scratch = gen_reg_rtx (mode);
18250 /* Use 8bit unsigned divimod if dividend and divisor are within
18251 the range [0-255]. */
18252 emit_move_insn (scratch, operands[2]);
18253 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18254 scratch, 1, OPTAB_DIRECT);
18255 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18256 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18257 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18258 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18259 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18260 pc_rtx);
18261 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
18262 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18263 JUMP_LABEL (insn) = qimode_label;
18265 /* Generate original signed/unsigned divimod. */
18266 div = gen_divmod4_1 (operands[0], operands[1],
18267 operands[2], operands[3]);
18268 emit_insn (div);
18270 /* Branch to the end. */
18271 emit_jump_insn (gen_jump (end_label));
18272 emit_barrier ();
18274 /* Generate 8bit unsigned divide. */
18275 emit_label (qimode_label);
18276 /* Don't use operands[0] for result of 8bit divide since not all
18277 registers support QImode ZERO_EXTRACT. */
18278 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18279 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18280 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18281 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18283 if (signed_p)
18285 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18286 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18288 else
18290 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18291 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18294 /* Extract remainder from AH. */
18295 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18296 if (REG_P (operands[1]))
18297 insn = emit_move_insn (operands[1], tmp1);
18298 else
18300 /* Need a new scratch register since the old one has result
18301 of 8bit divide. */
18302 scratch = gen_reg_rtx (mode);
18303 emit_move_insn (scratch, tmp1);
18304 insn = emit_move_insn (operands[1], scratch);
18306 set_unique_reg_note (insn, REG_EQUAL, mod);
18308 /* Zero extend quotient from AL. */
18309 tmp1 = gen_lowpart (QImode, tmp0);
18310 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18311 set_unique_reg_note (insn, REG_EQUAL, div);
18313 emit_label (end_label);
18316 #define LEA_MAX_STALL (3)
18317 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18319 /* Increase given DISTANCE in half-cycles according to
18320 dependencies between PREV and NEXT instructions.
18321 Add 1 half-cycle if there is no dependency and
18322 go to next cycle if there is some dependecy. */
18324 static unsigned int
18325 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18327 df_ref def, use;
18329 if (!prev || !next)
18330 return distance + (distance & 1) + 2;
18332 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18333 return distance + 1;
18335 FOR_EACH_INSN_USE (use, next)
18336 FOR_EACH_INSN_DEF (def, prev)
18337 if (!DF_REF_IS_ARTIFICIAL (def)
18338 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18339 return distance + (distance & 1) + 2;
18341 return distance + 1;
18344 /* Function checks if instruction INSN defines register number
18345 REGNO1 or REGNO2. */
18347 static bool
18348 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18349 rtx insn)
18351 df_ref def;
18353 FOR_EACH_INSN_DEF (def, insn)
18354 if (DF_REF_REG_DEF_P (def)
18355 && !DF_REF_IS_ARTIFICIAL (def)
18356 && (regno1 == DF_REF_REGNO (def)
18357 || regno2 == DF_REF_REGNO (def)))
18358 return true;
18360 return false;
18363 /* Function checks if instruction INSN uses register number
18364 REGNO as a part of address expression. */
18366 static bool
18367 insn_uses_reg_mem (unsigned int regno, rtx insn)
18369 df_ref use;
18371 FOR_EACH_INSN_USE (use, insn)
18372 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18373 return true;
18375 return false;
18378 /* Search backward for non-agu definition of register number REGNO1
18379 or register number REGNO2 in basic block starting from instruction
18380 START up to head of basic block or instruction INSN.
18382 Function puts true value into *FOUND var if definition was found
18383 and false otherwise.
18385 Distance in half-cycles between START and found instruction or head
18386 of BB is added to DISTANCE and returned. */
18388 static int
18389 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18390 rtx_insn *insn, int distance,
18391 rtx_insn *start, bool *found)
18393 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18394 rtx_insn *prev = start;
18395 rtx_insn *next = NULL;
18397 *found = false;
18399 while (prev
18400 && prev != insn
18401 && distance < LEA_SEARCH_THRESHOLD)
18403 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18405 distance = increase_distance (prev, next, distance);
18406 if (insn_defines_reg (regno1, regno2, prev))
18408 if (recog_memoized (prev) < 0
18409 || get_attr_type (prev) != TYPE_LEA)
18411 *found = true;
18412 return distance;
18416 next = prev;
18418 if (prev == BB_HEAD (bb))
18419 break;
18421 prev = PREV_INSN (prev);
18424 return distance;
18427 /* Search backward for non-agu definition of register number REGNO1
18428 or register number REGNO2 in INSN's basic block until
18429 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18430 2. Reach neighbour BBs boundary, or
18431 3. Reach agu definition.
18432 Returns the distance between the non-agu definition point and INSN.
18433 If no definition point, returns -1. */
18435 static int
18436 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18437 rtx_insn *insn)
18439 basic_block bb = BLOCK_FOR_INSN (insn);
18440 int distance = 0;
18441 bool found = false;
18443 if (insn != BB_HEAD (bb))
18444 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18445 distance, PREV_INSN (insn),
18446 &found);
18448 if (!found && distance < LEA_SEARCH_THRESHOLD)
18450 edge e;
18451 edge_iterator ei;
18452 bool simple_loop = false;
18454 FOR_EACH_EDGE (e, ei, bb->preds)
18455 if (e->src == bb)
18457 simple_loop = true;
18458 break;
18461 if (simple_loop)
18462 distance = distance_non_agu_define_in_bb (regno1, regno2,
18463 insn, distance,
18464 BB_END (bb), &found);
18465 else
18467 int shortest_dist = -1;
18468 bool found_in_bb = false;
18470 FOR_EACH_EDGE (e, ei, bb->preds)
18472 int bb_dist
18473 = distance_non_agu_define_in_bb (regno1, regno2,
18474 insn, distance,
18475 BB_END (e->src),
18476 &found_in_bb);
18477 if (found_in_bb)
18479 if (shortest_dist < 0)
18480 shortest_dist = bb_dist;
18481 else if (bb_dist > 0)
18482 shortest_dist = MIN (bb_dist, shortest_dist);
18484 found = true;
18488 distance = shortest_dist;
18492 /* get_attr_type may modify recog data. We want to make sure
18493 that recog data is valid for instruction INSN, on which
18494 distance_non_agu_define is called. INSN is unchanged here. */
18495 extract_insn_cached (insn);
18497 if (!found)
18498 return -1;
18500 return distance >> 1;
18503 /* Return the distance in half-cycles between INSN and the next
18504 insn that uses register number REGNO in memory address added
18505 to DISTANCE. Return -1 if REGNO0 is set.
18507 Put true value into *FOUND if register usage was found and
18508 false otherwise.
18509 Put true value into *REDEFINED if register redefinition was
18510 found and false otherwise. */
18512 static int
18513 distance_agu_use_in_bb (unsigned int regno,
18514 rtx_insn *insn, int distance, rtx_insn *start,
18515 bool *found, bool *redefined)
18517 basic_block bb = NULL;
18518 rtx_insn *next = start;
18519 rtx_insn *prev = NULL;
18521 *found = false;
18522 *redefined = false;
18524 if (start != NULL_RTX)
18526 bb = BLOCK_FOR_INSN (start);
18527 if (start != BB_HEAD (bb))
18528 /* If insn and start belong to the same bb, set prev to insn,
18529 so the call to increase_distance will increase the distance
18530 between insns by 1. */
18531 prev = insn;
18534 while (next
18535 && next != insn
18536 && distance < LEA_SEARCH_THRESHOLD)
18538 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18540 distance = increase_distance(prev, next, distance);
18541 if (insn_uses_reg_mem (regno, next))
18543 /* Return DISTANCE if OP0 is used in memory
18544 address in NEXT. */
18545 *found = true;
18546 return distance;
18549 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18551 /* Return -1 if OP0 is set in NEXT. */
18552 *redefined = true;
18553 return -1;
18556 prev = next;
18559 if (next == BB_END (bb))
18560 break;
18562 next = NEXT_INSN (next);
18565 return distance;
18568 /* Return the distance between INSN and the next insn that uses
18569 register number REGNO0 in memory address. Return -1 if no such
18570 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18572 static int
18573 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18575 basic_block bb = BLOCK_FOR_INSN (insn);
18576 int distance = 0;
18577 bool found = false;
18578 bool redefined = false;
18580 if (insn != BB_END (bb))
18581 distance = distance_agu_use_in_bb (regno0, insn, distance,
18582 NEXT_INSN (insn),
18583 &found, &redefined);
18585 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18587 edge e;
18588 edge_iterator ei;
18589 bool simple_loop = false;
18591 FOR_EACH_EDGE (e, ei, bb->succs)
18592 if (e->dest == bb)
18594 simple_loop = true;
18595 break;
18598 if (simple_loop)
18599 distance = distance_agu_use_in_bb (regno0, insn,
18600 distance, BB_HEAD (bb),
18601 &found, &redefined);
18602 else
18604 int shortest_dist = -1;
18605 bool found_in_bb = false;
18606 bool redefined_in_bb = false;
18608 FOR_EACH_EDGE (e, ei, bb->succs)
18610 int bb_dist
18611 = distance_agu_use_in_bb (regno0, insn,
18612 distance, BB_HEAD (e->dest),
18613 &found_in_bb, &redefined_in_bb);
18614 if (found_in_bb)
18616 if (shortest_dist < 0)
18617 shortest_dist = bb_dist;
18618 else if (bb_dist > 0)
18619 shortest_dist = MIN (bb_dist, shortest_dist);
18621 found = true;
18625 distance = shortest_dist;
18629 if (!found || redefined)
18630 return -1;
18632 return distance >> 1;
18635 /* Define this macro to tune LEA priority vs ADD, it take effect when
18636 there is a dilemma of choicing LEA or ADD
18637 Negative value: ADD is more preferred than LEA
18638 Zero: Netrual
18639 Positive value: LEA is more preferred than ADD*/
18640 #define IX86_LEA_PRIORITY 0
18642 /* Return true if usage of lea INSN has performance advantage
18643 over a sequence of instructions. Instructions sequence has
18644 SPLIT_COST cycles higher latency than lea latency. */
18646 static bool
18647 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18648 unsigned int regno2, int split_cost, bool has_scale)
18650 int dist_define, dist_use;
18652 /* For Silvermont if using a 2-source or 3-source LEA for
18653 non-destructive destination purposes, or due to wanting
18654 ability to use SCALE, the use of LEA is justified. */
18655 if (TARGET_SILVERMONT || TARGET_INTEL)
18657 if (has_scale)
18658 return true;
18659 if (split_cost < 1)
18660 return false;
18661 if (regno0 == regno1 || regno0 == regno2)
18662 return false;
18663 return true;
18666 dist_define = distance_non_agu_define (regno1, regno2, insn);
18667 dist_use = distance_agu_use (regno0, insn);
18669 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18671 /* If there is no non AGU operand definition, no AGU
18672 operand usage and split cost is 0 then both lea
18673 and non lea variants have same priority. Currently
18674 we prefer lea for 64 bit code and non lea on 32 bit
18675 code. */
18676 if (dist_use < 0 && split_cost == 0)
18677 return TARGET_64BIT || IX86_LEA_PRIORITY;
18678 else
18679 return true;
18682 /* With longer definitions distance lea is more preferable.
18683 Here we change it to take into account splitting cost and
18684 lea priority. */
18685 dist_define += split_cost + IX86_LEA_PRIORITY;
18687 /* If there is no use in memory addess then we just check
18688 that split cost exceeds AGU stall. */
18689 if (dist_use < 0)
18690 return dist_define > LEA_MAX_STALL;
18692 /* If this insn has both backward non-agu dependence and forward
18693 agu dependence, the one with short distance takes effect. */
18694 return dist_define >= dist_use;
18697 /* Return true if it is legal to clobber flags by INSN and
18698 false otherwise. */
18700 static bool
18701 ix86_ok_to_clobber_flags (rtx_insn *insn)
18703 basic_block bb = BLOCK_FOR_INSN (insn);
18704 df_ref use;
18705 bitmap live;
18707 while (insn)
18709 if (NONDEBUG_INSN_P (insn))
18711 FOR_EACH_INSN_USE (use, insn)
18712 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18713 return false;
18715 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18716 return true;
18719 if (insn == BB_END (bb))
18720 break;
18722 insn = NEXT_INSN (insn);
18725 live = df_get_live_out(bb);
18726 return !REGNO_REG_SET_P (live, FLAGS_REG);
18729 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18730 move and add to avoid AGU stalls. */
18732 bool
18733 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18735 unsigned int regno0, regno1, regno2;
18737 /* Check if we need to optimize. */
18738 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18739 return false;
18741 /* Check it is correct to split here. */
18742 if (!ix86_ok_to_clobber_flags(insn))
18743 return false;
18745 regno0 = true_regnum (operands[0]);
18746 regno1 = true_regnum (operands[1]);
18747 regno2 = true_regnum (operands[2]);
18749 /* We need to split only adds with non destructive
18750 destination operand. */
18751 if (regno0 == regno1 || regno0 == regno2)
18752 return false;
18753 else
18754 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18757 /* Return true if we should emit lea instruction instead of mov
18758 instruction. */
18760 bool
18761 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18763 unsigned int regno0, regno1;
18765 /* Check if we need to optimize. */
18766 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18767 return false;
18769 /* Use lea for reg to reg moves only. */
18770 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18771 return false;
18773 regno0 = true_regnum (operands[0]);
18774 regno1 = true_regnum (operands[1]);
18776 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18779 /* Return true if we need to split lea into a sequence of
18780 instructions to avoid AGU stalls. */
18782 bool
18783 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18785 unsigned int regno0, regno1, regno2;
18786 int split_cost;
18787 struct ix86_address parts;
18788 int ok;
18790 /* Check we need to optimize. */
18791 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18792 return false;
18794 /* The "at least two components" test below might not catch simple
18795 move or zero extension insns if parts.base is non-NULL and parts.disp
18796 is const0_rtx as the only components in the address, e.g. if the
18797 register is %rbp or %r13. As this test is much cheaper and moves or
18798 zero extensions are the common case, do this check first. */
18799 if (REG_P (operands[1])
18800 || (SImode_address_operand (operands[1], VOIDmode)
18801 && REG_P (XEXP (operands[1], 0))))
18802 return false;
18804 /* Check if it is OK to split here. */
18805 if (!ix86_ok_to_clobber_flags (insn))
18806 return false;
18808 ok = ix86_decompose_address (operands[1], &parts);
18809 gcc_assert (ok);
18811 /* There should be at least two components in the address. */
18812 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18813 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18814 return false;
18816 /* We should not split into add if non legitimate pic
18817 operand is used as displacement. */
18818 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18819 return false;
18821 regno0 = true_regnum (operands[0]) ;
18822 regno1 = INVALID_REGNUM;
18823 regno2 = INVALID_REGNUM;
18825 if (parts.base)
18826 regno1 = true_regnum (parts.base);
18827 if (parts.index)
18828 regno2 = true_regnum (parts.index);
18830 split_cost = 0;
18832 /* Compute how many cycles we will add to execution time
18833 if split lea into a sequence of instructions. */
18834 if (parts.base || parts.index)
18836 /* Have to use mov instruction if non desctructive
18837 destination form is used. */
18838 if (regno1 != regno0 && regno2 != regno0)
18839 split_cost += 1;
18841 /* Have to add index to base if both exist. */
18842 if (parts.base && parts.index)
18843 split_cost += 1;
18845 /* Have to use shift and adds if scale is 2 or greater. */
18846 if (parts.scale > 1)
18848 if (regno0 != regno1)
18849 split_cost += 1;
18850 else if (regno2 == regno0)
18851 split_cost += 4;
18852 else
18853 split_cost += parts.scale;
18856 /* Have to use add instruction with immediate if
18857 disp is non zero. */
18858 if (parts.disp && parts.disp != const0_rtx)
18859 split_cost += 1;
18861 /* Subtract the price of lea. */
18862 split_cost -= 1;
18865 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18866 parts.scale > 1);
18869 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18870 matches destination. RTX includes clobber of FLAGS_REG. */
18872 static void
18873 ix86_emit_binop (enum rtx_code code, machine_mode mode,
18874 rtx dst, rtx src)
18876 rtx op, clob;
18878 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18879 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18881 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18884 /* Return true if regno1 def is nearest to the insn. */
18886 static bool
18887 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18889 rtx_insn *prev = insn;
18890 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18892 if (insn == start)
18893 return false;
18894 while (prev && prev != start)
18896 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18898 prev = PREV_INSN (prev);
18899 continue;
18901 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18902 return true;
18903 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18904 return false;
18905 prev = PREV_INSN (prev);
18908 /* None of the regs is defined in the bb. */
18909 return false;
18912 /* Split lea instructions into a sequence of instructions
18913 which are executed on ALU to avoid AGU stalls.
18914 It is assumed that it is allowed to clobber flags register
18915 at lea position. */
18917 void
18918 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
18920 unsigned int regno0, regno1, regno2;
18921 struct ix86_address parts;
18922 rtx target, tmp;
18923 int ok, adds;
18925 ok = ix86_decompose_address (operands[1], &parts);
18926 gcc_assert (ok);
18928 target = gen_lowpart (mode, operands[0]);
18930 regno0 = true_regnum (target);
18931 regno1 = INVALID_REGNUM;
18932 regno2 = INVALID_REGNUM;
18934 if (parts.base)
18936 parts.base = gen_lowpart (mode, parts.base);
18937 regno1 = true_regnum (parts.base);
18940 if (parts.index)
18942 parts.index = gen_lowpart (mode, parts.index);
18943 regno2 = true_regnum (parts.index);
18946 if (parts.disp)
18947 parts.disp = gen_lowpart (mode, parts.disp);
18949 if (parts.scale > 1)
18951 /* Case r1 = r1 + ... */
18952 if (regno1 == regno0)
18954 /* If we have a case r1 = r1 + C * r2 then we
18955 should use multiplication which is very
18956 expensive. Assume cost model is wrong if we
18957 have such case here. */
18958 gcc_assert (regno2 != regno0);
18960 for (adds = parts.scale; adds > 0; adds--)
18961 ix86_emit_binop (PLUS, mode, target, parts.index);
18963 else
18965 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18966 if (regno0 != regno2)
18967 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18969 /* Use shift for scaling. */
18970 ix86_emit_binop (ASHIFT, mode, target,
18971 GEN_INT (exact_log2 (parts.scale)));
18973 if (parts.base)
18974 ix86_emit_binop (PLUS, mode, target, parts.base);
18976 if (parts.disp && parts.disp != const0_rtx)
18977 ix86_emit_binop (PLUS, mode, target, parts.disp);
18980 else if (!parts.base && !parts.index)
18982 gcc_assert(parts.disp);
18983 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18985 else
18987 if (!parts.base)
18989 if (regno0 != regno2)
18990 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18992 else if (!parts.index)
18994 if (regno0 != regno1)
18995 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18997 else
18999 if (regno0 == regno1)
19000 tmp = parts.index;
19001 else if (regno0 == regno2)
19002 tmp = parts.base;
19003 else
19005 rtx tmp1;
19007 /* Find better operand for SET instruction, depending
19008 on which definition is farther from the insn. */
19009 if (find_nearest_reg_def (insn, regno1, regno2))
19010 tmp = parts.index, tmp1 = parts.base;
19011 else
19012 tmp = parts.base, tmp1 = parts.index;
19014 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19016 if (parts.disp && parts.disp != const0_rtx)
19017 ix86_emit_binop (PLUS, mode, target, parts.disp);
19019 ix86_emit_binop (PLUS, mode, target, tmp1);
19020 return;
19023 ix86_emit_binop (PLUS, mode, target, tmp);
19026 if (parts.disp && parts.disp != const0_rtx)
19027 ix86_emit_binop (PLUS, mode, target, parts.disp);
19031 /* Return true if it is ok to optimize an ADD operation to LEA
19032 operation to avoid flag register consumation. For most processors,
19033 ADD is faster than LEA. For the processors like BONNELL, if the
19034 destination register of LEA holds an actual address which will be
19035 used soon, LEA is better and otherwise ADD is better. */
19037 bool
19038 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
19040 unsigned int regno0 = true_regnum (operands[0]);
19041 unsigned int regno1 = true_regnum (operands[1]);
19042 unsigned int regno2 = true_regnum (operands[2]);
19044 /* If a = b + c, (a!=b && a!=c), must use lea form. */
19045 if (regno0 != regno1 && regno0 != regno2)
19046 return true;
19048 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
19049 return false;
19051 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
19054 /* Return true if destination reg of SET_BODY is shift count of
19055 USE_BODY. */
19057 static bool
19058 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
19060 rtx set_dest;
19061 rtx shift_rtx;
19062 int i;
19064 /* Retrieve destination of SET_BODY. */
19065 switch (GET_CODE (set_body))
19067 case SET:
19068 set_dest = SET_DEST (set_body);
19069 if (!set_dest || !REG_P (set_dest))
19070 return false;
19071 break;
19072 case PARALLEL:
19073 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
19074 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
19075 use_body))
19076 return true;
19077 default:
19078 return false;
19079 break;
19082 /* Retrieve shift count of USE_BODY. */
19083 switch (GET_CODE (use_body))
19085 case SET:
19086 shift_rtx = XEXP (use_body, 1);
19087 break;
19088 case PARALLEL:
19089 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
19090 if (ix86_dep_by_shift_count_body (set_body,
19091 XVECEXP (use_body, 0, i)))
19092 return true;
19093 default:
19094 return false;
19095 break;
19098 if (shift_rtx
19099 && (GET_CODE (shift_rtx) == ASHIFT
19100 || GET_CODE (shift_rtx) == LSHIFTRT
19101 || GET_CODE (shift_rtx) == ASHIFTRT
19102 || GET_CODE (shift_rtx) == ROTATE
19103 || GET_CODE (shift_rtx) == ROTATERT))
19105 rtx shift_count = XEXP (shift_rtx, 1);
19107 /* Return true if shift count is dest of SET_BODY. */
19108 if (REG_P (shift_count))
19110 /* Add check since it can be invoked before register
19111 allocation in pre-reload schedule. */
19112 if (reload_completed
19113 && true_regnum (set_dest) == true_regnum (shift_count))
19114 return true;
19115 else if (REGNO(set_dest) == REGNO(shift_count))
19116 return true;
19120 return false;
19123 /* Return true if destination reg of SET_INSN is shift count of
19124 USE_INSN. */
19126 bool
19127 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
19129 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
19130 PATTERN (use_insn));
19133 /* Return TRUE or FALSE depending on whether the unary operator meets the
19134 appropriate constraints. */
19136 bool
19137 ix86_unary_operator_ok (enum rtx_code,
19138 machine_mode,
19139 rtx operands[2])
19141 /* If one of operands is memory, source and destination must match. */
19142 if ((MEM_P (operands[0])
19143 || MEM_P (operands[1]))
19144 && ! rtx_equal_p (operands[0], operands[1]))
19145 return false;
19146 return true;
19149 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
19150 are ok, keeping in mind the possible movddup alternative. */
19152 bool
19153 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
19155 if (MEM_P (operands[0]))
19156 return rtx_equal_p (operands[0], operands[1 + high]);
19157 if (MEM_P (operands[1]) && MEM_P (operands[2]))
19158 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
19159 return true;
19162 /* Post-reload splitter for converting an SF or DFmode value in an
19163 SSE register into an unsigned SImode. */
19165 void
19166 ix86_split_convert_uns_si_sse (rtx operands[])
19168 machine_mode vecmode;
19169 rtx value, large, zero_or_two31, input, two31, x;
19171 large = operands[1];
19172 zero_or_two31 = operands[2];
19173 input = operands[3];
19174 two31 = operands[4];
19175 vecmode = GET_MODE (large);
19176 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
19178 /* Load up the value into the low element. We must ensure that the other
19179 elements are valid floats -- zero is the easiest such value. */
19180 if (MEM_P (input))
19182 if (vecmode == V4SFmode)
19183 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
19184 else
19185 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
19187 else
19189 input = gen_rtx_REG (vecmode, REGNO (input));
19190 emit_move_insn (value, CONST0_RTX (vecmode));
19191 if (vecmode == V4SFmode)
19192 emit_insn (gen_sse_movss (value, value, input));
19193 else
19194 emit_insn (gen_sse2_movsd (value, value, input));
19197 emit_move_insn (large, two31);
19198 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19200 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19201 emit_insn (gen_rtx_SET (VOIDmode, large, x));
19203 x = gen_rtx_AND (vecmode, zero_or_two31, large);
19204 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
19206 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19207 emit_insn (gen_rtx_SET (VOIDmode, value, x));
19209 large = gen_rtx_REG (V4SImode, REGNO (large));
19210 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19212 x = gen_rtx_REG (V4SImode, REGNO (value));
19213 if (vecmode == V4SFmode)
19214 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19215 else
19216 emit_insn (gen_sse2_cvttpd2dq (x, value));
19217 value = x;
19219 emit_insn (gen_xorv4si3 (value, value, large));
19222 /* Convert an unsigned DImode value into a DFmode, using only SSE.
19223 Expects the 64-bit DImode to be supplied in a pair of integral
19224 registers. Requires SSE2; will use SSE3 if available. For x86_32,
19225 -mfpmath=sse, !optimize_size only. */
19227 void
19228 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19230 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19231 rtx int_xmm, fp_xmm;
19232 rtx biases, exponents;
19233 rtx x;
19235 int_xmm = gen_reg_rtx (V4SImode);
19236 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19237 emit_insn (gen_movdi_to_sse (int_xmm, input));
19238 else if (TARGET_SSE_SPLIT_REGS)
19240 emit_clobber (int_xmm);
19241 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19243 else
19245 x = gen_reg_rtx (V2DImode);
19246 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19247 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19250 x = gen_rtx_CONST_VECTOR (V4SImode,
19251 gen_rtvec (4, GEN_INT (0x43300000UL),
19252 GEN_INT (0x45300000UL),
19253 const0_rtx, const0_rtx));
19254 exponents = validize_mem (force_const_mem (V4SImode, x));
19256 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19257 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19259 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19260 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19261 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19262 (0x1.0p84 + double(fp_value_hi_xmm)).
19263 Note these exponents differ by 32. */
19265 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19267 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19268 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
19269 real_ldexp (&bias_lo_rvt, &dconst1, 52);
19270 real_ldexp (&bias_hi_rvt, &dconst1, 84);
19271 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19272 x = const_double_from_real_value (bias_hi_rvt, DFmode);
19273 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19274 biases = validize_mem (force_const_mem (V2DFmode, biases));
19275 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19277 /* Add the upper and lower DFmode values together. */
19278 if (TARGET_SSE3)
19279 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19280 else
19282 x = copy_to_mode_reg (V2DFmode, fp_xmm);
19283 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19284 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19287 ix86_expand_vector_extract (false, target, fp_xmm, 0);
19290 /* Not used, but eases macroization of patterns. */
19291 void
19292 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19294 gcc_unreachable ();
19297 /* Convert an unsigned SImode value into a DFmode. Only currently used
19298 for SSE, but applicable anywhere. */
19300 void
19301 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19303 REAL_VALUE_TYPE TWO31r;
19304 rtx x, fp;
19306 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19307 NULL, 1, OPTAB_DIRECT);
19309 fp = gen_reg_rtx (DFmode);
19310 emit_insn (gen_floatsidf2 (fp, x));
19312 real_ldexp (&TWO31r, &dconst1, 31);
19313 x = const_double_from_real_value (TWO31r, DFmode);
19315 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19316 if (x != target)
19317 emit_move_insn (target, x);
19320 /* Convert a signed DImode value into a DFmode. Only used for SSE in
19321 32-bit mode; otherwise we have a direct convert instruction. */
19323 void
19324 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19326 REAL_VALUE_TYPE TWO32r;
19327 rtx fp_lo, fp_hi, x;
19329 fp_lo = gen_reg_rtx (DFmode);
19330 fp_hi = gen_reg_rtx (DFmode);
19332 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19334 real_ldexp (&TWO32r, &dconst1, 32);
19335 x = const_double_from_real_value (TWO32r, DFmode);
19336 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19338 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19340 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19341 0, OPTAB_DIRECT);
19342 if (x != target)
19343 emit_move_insn (target, x);
19346 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19347 For x86_32, -mfpmath=sse, !optimize_size only. */
19348 void
19349 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19351 REAL_VALUE_TYPE ONE16r;
19352 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19354 real_ldexp (&ONE16r, &dconst1, 16);
19355 x = const_double_from_real_value (ONE16r, SFmode);
19356 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19357 NULL, 0, OPTAB_DIRECT);
19358 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19359 NULL, 0, OPTAB_DIRECT);
19360 fp_hi = gen_reg_rtx (SFmode);
19361 fp_lo = gen_reg_rtx (SFmode);
19362 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19363 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19364 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19365 0, OPTAB_DIRECT);
19366 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19367 0, OPTAB_DIRECT);
19368 if (!rtx_equal_p (target, fp_hi))
19369 emit_move_insn (target, fp_hi);
19372 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19373 a vector of unsigned ints VAL to vector of floats TARGET. */
19375 void
19376 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19378 rtx tmp[8];
19379 REAL_VALUE_TYPE TWO16r;
19380 machine_mode intmode = GET_MODE (val);
19381 machine_mode fltmode = GET_MODE (target);
19382 rtx (*cvt) (rtx, rtx);
19384 if (intmode == V4SImode)
19385 cvt = gen_floatv4siv4sf2;
19386 else
19387 cvt = gen_floatv8siv8sf2;
19388 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19389 tmp[0] = force_reg (intmode, tmp[0]);
19390 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19391 OPTAB_DIRECT);
19392 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19393 NULL_RTX, 1, OPTAB_DIRECT);
19394 tmp[3] = gen_reg_rtx (fltmode);
19395 emit_insn (cvt (tmp[3], tmp[1]));
19396 tmp[4] = gen_reg_rtx (fltmode);
19397 emit_insn (cvt (tmp[4], tmp[2]));
19398 real_ldexp (&TWO16r, &dconst1, 16);
19399 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19400 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19401 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19402 OPTAB_DIRECT);
19403 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19404 OPTAB_DIRECT);
19405 if (tmp[7] != target)
19406 emit_move_insn (target, tmp[7]);
19409 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19410 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19411 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19412 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19415 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19417 REAL_VALUE_TYPE TWO31r;
19418 rtx two31r, tmp[4];
19419 machine_mode mode = GET_MODE (val);
19420 machine_mode scalarmode = GET_MODE_INNER (mode);
19421 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19422 rtx (*cmp) (rtx, rtx, rtx, rtx);
19423 int i;
19425 for (i = 0; i < 3; i++)
19426 tmp[i] = gen_reg_rtx (mode);
19427 real_ldexp (&TWO31r, &dconst1, 31);
19428 two31r = const_double_from_real_value (TWO31r, scalarmode);
19429 two31r = ix86_build_const_vector (mode, 1, two31r);
19430 two31r = force_reg (mode, two31r);
19431 switch (mode)
19433 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19434 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19435 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19436 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19437 default: gcc_unreachable ();
19439 tmp[3] = gen_rtx_LE (mode, two31r, val);
19440 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19441 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19442 0, OPTAB_DIRECT);
19443 if (intmode == V4SImode || TARGET_AVX2)
19444 *xorp = expand_simple_binop (intmode, ASHIFT,
19445 gen_lowpart (intmode, tmp[0]),
19446 GEN_INT (31), NULL_RTX, 0,
19447 OPTAB_DIRECT);
19448 else
19450 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
19451 two31 = ix86_build_const_vector (intmode, 1, two31);
19452 *xorp = expand_simple_binop (intmode, AND,
19453 gen_lowpart (intmode, tmp[0]),
19454 two31, NULL_RTX, 0,
19455 OPTAB_DIRECT);
19457 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19458 0, OPTAB_DIRECT);
19461 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19462 then replicate the value for all elements of the vector
19463 register. */
19466 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19468 int i, n_elt;
19469 rtvec v;
19470 machine_mode scalar_mode;
19472 switch (mode)
19474 case V64QImode:
19475 case V32QImode:
19476 case V16QImode:
19477 case V32HImode:
19478 case V16HImode:
19479 case V8HImode:
19480 case V16SImode:
19481 case V8SImode:
19482 case V4SImode:
19483 case V8DImode:
19484 case V4DImode:
19485 case V2DImode:
19486 gcc_assert (vect);
19487 case V16SFmode:
19488 case V8SFmode:
19489 case V4SFmode:
19490 case V8DFmode:
19491 case V4DFmode:
19492 case V2DFmode:
19493 n_elt = GET_MODE_NUNITS (mode);
19494 v = rtvec_alloc (n_elt);
19495 scalar_mode = GET_MODE_INNER (mode);
19497 RTVEC_ELT (v, 0) = value;
19499 for (i = 1; i < n_elt; ++i)
19500 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19502 return gen_rtx_CONST_VECTOR (mode, v);
19504 default:
19505 gcc_unreachable ();
19509 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19510 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19511 for an SSE register. If VECT is true, then replicate the mask for
19512 all elements of the vector register. If INVERT is true, then create
19513 a mask excluding the sign bit. */
19516 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19518 machine_mode vec_mode, imode;
19519 HOST_WIDE_INT hi, lo;
19520 int shift = 63;
19521 rtx v;
19522 rtx mask;
19524 /* Find the sign bit, sign extended to 2*HWI. */
19525 switch (mode)
19527 case V16SImode:
19528 case V16SFmode:
19529 case V8SImode:
19530 case V4SImode:
19531 case V8SFmode:
19532 case V4SFmode:
19533 vec_mode = mode;
19534 mode = GET_MODE_INNER (mode);
19535 imode = SImode;
19536 lo = 0x80000000, hi = lo < 0;
19537 break;
19539 case V8DImode:
19540 case V4DImode:
19541 case V2DImode:
19542 case V8DFmode:
19543 case V4DFmode:
19544 case V2DFmode:
19545 vec_mode = mode;
19546 mode = GET_MODE_INNER (mode);
19547 imode = DImode;
19548 if (HOST_BITS_PER_WIDE_INT >= 64)
19549 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19550 else
19551 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19552 break;
19554 case TImode:
19555 case TFmode:
19556 vec_mode = VOIDmode;
19557 if (HOST_BITS_PER_WIDE_INT >= 64)
19559 imode = TImode;
19560 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19562 else
19564 rtvec vec;
19566 imode = DImode;
19567 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19569 if (invert)
19571 lo = ~lo, hi = ~hi;
19572 v = constm1_rtx;
19574 else
19575 v = const0_rtx;
19577 mask = immed_double_const (lo, hi, imode);
19579 vec = gen_rtvec (2, v, mask);
19580 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19581 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19583 return v;
19585 break;
19587 default:
19588 gcc_unreachable ();
19591 if (invert)
19592 lo = ~lo, hi = ~hi;
19594 /* Force this value into the low part of a fp vector constant. */
19595 mask = immed_double_const (lo, hi, imode);
19596 mask = gen_lowpart (mode, mask);
19598 if (vec_mode == VOIDmode)
19599 return force_reg (mode, mask);
19601 v = ix86_build_const_vector (vec_mode, vect, mask);
19602 return force_reg (vec_mode, v);
19605 /* Generate code for floating point ABS or NEG. */
19607 void
19608 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19609 rtx operands[])
19611 rtx mask, set, dst, src;
19612 bool use_sse = false;
19613 bool vector_mode = VECTOR_MODE_P (mode);
19614 machine_mode vmode = mode;
19616 if (vector_mode)
19617 use_sse = true;
19618 else if (mode == TFmode)
19619 use_sse = true;
19620 else if (TARGET_SSE_MATH)
19622 use_sse = SSE_FLOAT_MODE_P (mode);
19623 if (mode == SFmode)
19624 vmode = V4SFmode;
19625 else if (mode == DFmode)
19626 vmode = V2DFmode;
19629 /* NEG and ABS performed with SSE use bitwise mask operations.
19630 Create the appropriate mask now. */
19631 if (use_sse)
19632 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19633 else
19634 mask = NULL_RTX;
19636 dst = operands[0];
19637 src = operands[1];
19639 set = gen_rtx_fmt_e (code, mode, src);
19640 set = gen_rtx_SET (VOIDmode, dst, set);
19642 if (mask)
19644 rtx use, clob;
19645 rtvec par;
19647 use = gen_rtx_USE (VOIDmode, mask);
19648 if (vector_mode)
19649 par = gen_rtvec (2, set, use);
19650 else
19652 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19653 par = gen_rtvec (3, set, use, clob);
19655 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19657 else
19658 emit_insn (set);
19661 /* Expand a copysign operation. Special case operand 0 being a constant. */
19663 void
19664 ix86_expand_copysign (rtx operands[])
19666 machine_mode mode, vmode;
19667 rtx dest, op0, op1, mask, nmask;
19669 dest = operands[0];
19670 op0 = operands[1];
19671 op1 = operands[2];
19673 mode = GET_MODE (dest);
19675 if (mode == SFmode)
19676 vmode = V4SFmode;
19677 else if (mode == DFmode)
19678 vmode = V2DFmode;
19679 else
19680 vmode = mode;
19682 if (GET_CODE (op0) == CONST_DOUBLE)
19684 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19686 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19687 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19689 if (mode == SFmode || mode == DFmode)
19691 if (op0 == CONST0_RTX (mode))
19692 op0 = CONST0_RTX (vmode);
19693 else
19695 rtx v = ix86_build_const_vector (vmode, false, op0);
19697 op0 = force_reg (vmode, v);
19700 else if (op0 != CONST0_RTX (mode))
19701 op0 = force_reg (mode, op0);
19703 mask = ix86_build_signbit_mask (vmode, 0, 0);
19705 if (mode == SFmode)
19706 copysign_insn = gen_copysignsf3_const;
19707 else if (mode == DFmode)
19708 copysign_insn = gen_copysigndf3_const;
19709 else
19710 copysign_insn = gen_copysigntf3_const;
19712 emit_insn (copysign_insn (dest, op0, op1, mask));
19714 else
19716 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19718 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19719 mask = ix86_build_signbit_mask (vmode, 0, 0);
19721 if (mode == SFmode)
19722 copysign_insn = gen_copysignsf3_var;
19723 else if (mode == DFmode)
19724 copysign_insn = gen_copysigndf3_var;
19725 else
19726 copysign_insn = gen_copysigntf3_var;
19728 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19732 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19733 be a constant, and so has already been expanded into a vector constant. */
19735 void
19736 ix86_split_copysign_const (rtx operands[])
19738 machine_mode mode, vmode;
19739 rtx dest, op0, mask, x;
19741 dest = operands[0];
19742 op0 = operands[1];
19743 mask = operands[3];
19745 mode = GET_MODE (dest);
19746 vmode = GET_MODE (mask);
19748 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19749 x = gen_rtx_AND (vmode, dest, mask);
19750 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19752 if (op0 != CONST0_RTX (vmode))
19754 x = gen_rtx_IOR (vmode, dest, op0);
19755 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19759 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19760 so we have to do two masks. */
19762 void
19763 ix86_split_copysign_var (rtx operands[])
19765 machine_mode mode, vmode;
19766 rtx dest, scratch, op0, op1, mask, nmask, x;
19768 dest = operands[0];
19769 scratch = operands[1];
19770 op0 = operands[2];
19771 op1 = operands[3];
19772 nmask = operands[4];
19773 mask = operands[5];
19775 mode = GET_MODE (dest);
19776 vmode = GET_MODE (mask);
19778 if (rtx_equal_p (op0, op1))
19780 /* Shouldn't happen often (it's useless, obviously), but when it does
19781 we'd generate incorrect code if we continue below. */
19782 emit_move_insn (dest, op0);
19783 return;
19786 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19788 gcc_assert (REGNO (op1) == REGNO (scratch));
19790 x = gen_rtx_AND (vmode, scratch, mask);
19791 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19793 dest = mask;
19794 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19795 x = gen_rtx_NOT (vmode, dest);
19796 x = gen_rtx_AND (vmode, x, op0);
19797 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19799 else
19801 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19803 x = gen_rtx_AND (vmode, scratch, mask);
19805 else /* alternative 2,4 */
19807 gcc_assert (REGNO (mask) == REGNO (scratch));
19808 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19809 x = gen_rtx_AND (vmode, scratch, op1);
19811 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19813 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19815 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19816 x = gen_rtx_AND (vmode, dest, nmask);
19818 else /* alternative 3,4 */
19820 gcc_assert (REGNO (nmask) == REGNO (dest));
19821 dest = nmask;
19822 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19823 x = gen_rtx_AND (vmode, dest, op0);
19825 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19828 x = gen_rtx_IOR (vmode, dest, scratch);
19829 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19832 /* Return TRUE or FALSE depending on whether the first SET in INSN
19833 has source and destination with matching CC modes, and that the
19834 CC mode is at least as constrained as REQ_MODE. */
19836 bool
19837 ix86_match_ccmode (rtx insn, machine_mode req_mode)
19839 rtx set;
19840 machine_mode set_mode;
19842 set = PATTERN (insn);
19843 if (GET_CODE (set) == PARALLEL)
19844 set = XVECEXP (set, 0, 0);
19845 gcc_assert (GET_CODE (set) == SET);
19846 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19848 set_mode = GET_MODE (SET_DEST (set));
19849 switch (set_mode)
19851 case CCNOmode:
19852 if (req_mode != CCNOmode
19853 && (req_mode != CCmode
19854 || XEXP (SET_SRC (set), 1) != const0_rtx))
19855 return false;
19856 break;
19857 case CCmode:
19858 if (req_mode == CCGCmode)
19859 return false;
19860 /* FALLTHRU */
19861 case CCGCmode:
19862 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19863 return false;
19864 /* FALLTHRU */
19865 case CCGOCmode:
19866 if (req_mode == CCZmode)
19867 return false;
19868 /* FALLTHRU */
19869 case CCZmode:
19870 break;
19872 case CCAmode:
19873 case CCCmode:
19874 case CCOmode:
19875 case CCSmode:
19876 if (set_mode != req_mode)
19877 return false;
19878 break;
19880 default:
19881 gcc_unreachable ();
19884 return GET_MODE (SET_SRC (set)) == set_mode;
19887 /* Generate insn patterns to do an integer compare of OPERANDS. */
19889 static rtx
19890 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19892 machine_mode cmpmode;
19893 rtx tmp, flags;
19895 cmpmode = SELECT_CC_MODE (code, op0, op1);
19896 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19898 /* This is very simple, but making the interface the same as in the
19899 FP case makes the rest of the code easier. */
19900 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19901 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19903 /* Return the test that should be put into the flags user, i.e.
19904 the bcc, scc, or cmov instruction. */
19905 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19908 /* Figure out whether to use ordered or unordered fp comparisons.
19909 Return the appropriate mode to use. */
19911 machine_mode
19912 ix86_fp_compare_mode (enum rtx_code)
19914 /* ??? In order to make all comparisons reversible, we do all comparisons
19915 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19916 all forms trapping and nontrapping comparisons, we can make inequality
19917 comparisons trapping again, since it results in better code when using
19918 FCOM based compares. */
19919 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19922 machine_mode
19923 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19925 machine_mode mode = GET_MODE (op0);
19927 if (SCALAR_FLOAT_MODE_P (mode))
19929 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19930 return ix86_fp_compare_mode (code);
19933 switch (code)
19935 /* Only zero flag is needed. */
19936 case EQ: /* ZF=0 */
19937 case NE: /* ZF!=0 */
19938 return CCZmode;
19939 /* Codes needing carry flag. */
19940 case GEU: /* CF=0 */
19941 case LTU: /* CF=1 */
19942 /* Detect overflow checks. They need just the carry flag. */
19943 if (GET_CODE (op0) == PLUS
19944 && rtx_equal_p (op1, XEXP (op0, 0)))
19945 return CCCmode;
19946 else
19947 return CCmode;
19948 case GTU: /* CF=0 & ZF=0 */
19949 case LEU: /* CF=1 | ZF=1 */
19950 return CCmode;
19951 /* Codes possibly doable only with sign flag when
19952 comparing against zero. */
19953 case GE: /* SF=OF or SF=0 */
19954 case LT: /* SF<>OF or SF=1 */
19955 if (op1 == const0_rtx)
19956 return CCGOCmode;
19957 else
19958 /* For other cases Carry flag is not required. */
19959 return CCGCmode;
19960 /* Codes doable only with sign flag when comparing
19961 against zero, but we miss jump instruction for it
19962 so we need to use relational tests against overflow
19963 that thus needs to be zero. */
19964 case GT: /* ZF=0 & SF=OF */
19965 case LE: /* ZF=1 | SF<>OF */
19966 if (op1 == const0_rtx)
19967 return CCNOmode;
19968 else
19969 return CCGCmode;
19970 /* strcmp pattern do (use flags) and combine may ask us for proper
19971 mode. */
19972 case USE:
19973 return CCmode;
19974 default:
19975 gcc_unreachable ();
19979 /* Return the fixed registers used for condition codes. */
19981 static bool
19982 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19984 *p1 = FLAGS_REG;
19985 *p2 = FPSR_REG;
19986 return true;
19989 /* If two condition code modes are compatible, return a condition code
19990 mode which is compatible with both. Otherwise, return
19991 VOIDmode. */
19993 static machine_mode
19994 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
19996 if (m1 == m2)
19997 return m1;
19999 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
20000 return VOIDmode;
20002 if ((m1 == CCGCmode && m2 == CCGOCmode)
20003 || (m1 == CCGOCmode && m2 == CCGCmode))
20004 return CCGCmode;
20006 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
20007 return m2;
20008 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
20009 return m1;
20011 switch (m1)
20013 default:
20014 gcc_unreachable ();
20016 case CCmode:
20017 case CCGCmode:
20018 case CCGOCmode:
20019 case CCNOmode:
20020 case CCAmode:
20021 case CCCmode:
20022 case CCOmode:
20023 case CCSmode:
20024 case CCZmode:
20025 switch (m2)
20027 default:
20028 return VOIDmode;
20030 case CCmode:
20031 case CCGCmode:
20032 case CCGOCmode:
20033 case CCNOmode:
20034 case CCAmode:
20035 case CCCmode:
20036 case CCOmode:
20037 case CCSmode:
20038 case CCZmode:
20039 return CCmode;
20042 case CCFPmode:
20043 case CCFPUmode:
20044 /* These are only compatible with themselves, which we already
20045 checked above. */
20046 return VOIDmode;
20051 /* Return a comparison we can do and that it is equivalent to
20052 swap_condition (code) apart possibly from orderedness.
20053 But, never change orderedness if TARGET_IEEE_FP, returning
20054 UNKNOWN in that case if necessary. */
20056 static enum rtx_code
20057 ix86_fp_swap_condition (enum rtx_code code)
20059 switch (code)
20061 case GT: /* GTU - CF=0 & ZF=0 */
20062 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
20063 case GE: /* GEU - CF=0 */
20064 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
20065 case UNLT: /* LTU - CF=1 */
20066 return TARGET_IEEE_FP ? UNKNOWN : GT;
20067 case UNLE: /* LEU - CF=1 | ZF=1 */
20068 return TARGET_IEEE_FP ? UNKNOWN : GE;
20069 default:
20070 return swap_condition (code);
20074 /* Return cost of comparison CODE using the best strategy for performance.
20075 All following functions do use number of instructions as a cost metrics.
20076 In future this should be tweaked to compute bytes for optimize_size and
20077 take into account performance of various instructions on various CPUs. */
20079 static int
20080 ix86_fp_comparison_cost (enum rtx_code code)
20082 int arith_cost;
20084 /* The cost of code using bit-twiddling on %ah. */
20085 switch (code)
20087 case UNLE:
20088 case UNLT:
20089 case LTGT:
20090 case GT:
20091 case GE:
20092 case UNORDERED:
20093 case ORDERED:
20094 case UNEQ:
20095 arith_cost = 4;
20096 break;
20097 case LT:
20098 case NE:
20099 case EQ:
20100 case UNGE:
20101 arith_cost = TARGET_IEEE_FP ? 5 : 4;
20102 break;
20103 case LE:
20104 case UNGT:
20105 arith_cost = TARGET_IEEE_FP ? 6 : 4;
20106 break;
20107 default:
20108 gcc_unreachable ();
20111 switch (ix86_fp_comparison_strategy (code))
20113 case IX86_FPCMP_COMI:
20114 return arith_cost > 4 ? 3 : 2;
20115 case IX86_FPCMP_SAHF:
20116 return arith_cost > 4 ? 4 : 3;
20117 default:
20118 return arith_cost;
20122 /* Return strategy to use for floating-point. We assume that fcomi is always
20123 preferrable where available, since that is also true when looking at size
20124 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
20126 enum ix86_fpcmp_strategy
20127 ix86_fp_comparison_strategy (enum rtx_code)
20129 /* Do fcomi/sahf based test when profitable. */
20131 if (TARGET_CMOVE)
20132 return IX86_FPCMP_COMI;
20134 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
20135 return IX86_FPCMP_SAHF;
20137 return IX86_FPCMP_ARITH;
20140 /* Swap, force into registers, or otherwise massage the two operands
20141 to a fp comparison. The operands are updated in place; the new
20142 comparison code is returned. */
20144 static enum rtx_code
20145 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
20147 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
20148 rtx op0 = *pop0, op1 = *pop1;
20149 machine_mode op_mode = GET_MODE (op0);
20150 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
20152 /* All of the unordered compare instructions only work on registers.
20153 The same is true of the fcomi compare instructions. The XFmode
20154 compare instructions require registers except when comparing
20155 against zero or when converting operand 1 from fixed point to
20156 floating point. */
20158 if (!is_sse
20159 && (fpcmp_mode == CCFPUmode
20160 || (op_mode == XFmode
20161 && ! (standard_80387_constant_p (op0) == 1
20162 || standard_80387_constant_p (op1) == 1)
20163 && GET_CODE (op1) != FLOAT)
20164 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
20166 op0 = force_reg (op_mode, op0);
20167 op1 = force_reg (op_mode, op1);
20169 else
20171 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
20172 things around if they appear profitable, otherwise force op0
20173 into a register. */
20175 if (standard_80387_constant_p (op0) == 0
20176 || (MEM_P (op0)
20177 && ! (standard_80387_constant_p (op1) == 0
20178 || MEM_P (op1))))
20180 enum rtx_code new_code = ix86_fp_swap_condition (code);
20181 if (new_code != UNKNOWN)
20183 std::swap (op0, op1);
20184 code = new_code;
20188 if (!REG_P (op0))
20189 op0 = force_reg (op_mode, op0);
20191 if (CONSTANT_P (op1))
20193 int tmp = standard_80387_constant_p (op1);
20194 if (tmp == 0)
20195 op1 = validize_mem (force_const_mem (op_mode, op1));
20196 else if (tmp == 1)
20198 if (TARGET_CMOVE)
20199 op1 = force_reg (op_mode, op1);
20201 else
20202 op1 = force_reg (op_mode, op1);
20206 /* Try to rearrange the comparison to make it cheaper. */
20207 if (ix86_fp_comparison_cost (code)
20208 > ix86_fp_comparison_cost (swap_condition (code))
20209 && (REG_P (op1) || can_create_pseudo_p ()))
20211 std::swap (op0, op1);
20212 code = swap_condition (code);
20213 if (!REG_P (op0))
20214 op0 = force_reg (op_mode, op0);
20217 *pop0 = op0;
20218 *pop1 = op1;
20219 return code;
20222 /* Convert comparison codes we use to represent FP comparison to integer
20223 code that will result in proper branch. Return UNKNOWN if no such code
20224 is available. */
20226 enum rtx_code
20227 ix86_fp_compare_code_to_integer (enum rtx_code code)
20229 switch (code)
20231 case GT:
20232 return GTU;
20233 case GE:
20234 return GEU;
20235 case ORDERED:
20236 case UNORDERED:
20237 return code;
20238 break;
20239 case UNEQ:
20240 return EQ;
20241 break;
20242 case UNLT:
20243 return LTU;
20244 break;
20245 case UNLE:
20246 return LEU;
20247 break;
20248 case LTGT:
20249 return NE;
20250 break;
20251 default:
20252 return UNKNOWN;
20256 /* Generate insn patterns to do a floating point compare of OPERANDS. */
20258 static rtx
20259 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20261 machine_mode fpcmp_mode, intcmp_mode;
20262 rtx tmp, tmp2;
20264 fpcmp_mode = ix86_fp_compare_mode (code);
20265 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20267 /* Do fcomi/sahf based test when profitable. */
20268 switch (ix86_fp_comparison_strategy (code))
20270 case IX86_FPCMP_COMI:
20271 intcmp_mode = fpcmp_mode;
20272 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20273 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20274 tmp);
20275 emit_insn (tmp);
20276 break;
20278 case IX86_FPCMP_SAHF:
20279 intcmp_mode = fpcmp_mode;
20280 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20281 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20282 tmp);
20284 if (!scratch)
20285 scratch = gen_reg_rtx (HImode);
20286 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20287 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20288 break;
20290 case IX86_FPCMP_ARITH:
20291 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
20292 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20293 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20294 if (!scratch)
20295 scratch = gen_reg_rtx (HImode);
20296 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
20298 /* In the unordered case, we have to check C2 for NaN's, which
20299 doesn't happen to work out to anything nice combination-wise.
20300 So do some bit twiddling on the value we've got in AH to come
20301 up with an appropriate set of condition codes. */
20303 intcmp_mode = CCNOmode;
20304 switch (code)
20306 case GT:
20307 case UNGT:
20308 if (code == GT || !TARGET_IEEE_FP)
20310 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20311 code = EQ;
20313 else
20315 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20316 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20317 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20318 intcmp_mode = CCmode;
20319 code = GEU;
20321 break;
20322 case LT:
20323 case UNLT:
20324 if (code == LT && TARGET_IEEE_FP)
20326 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20327 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20328 intcmp_mode = CCmode;
20329 code = EQ;
20331 else
20333 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20334 code = NE;
20336 break;
20337 case GE:
20338 case UNGE:
20339 if (code == GE || !TARGET_IEEE_FP)
20341 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20342 code = EQ;
20344 else
20346 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20347 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20348 code = NE;
20350 break;
20351 case LE:
20352 case UNLE:
20353 if (code == LE && TARGET_IEEE_FP)
20355 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20356 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20357 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20358 intcmp_mode = CCmode;
20359 code = LTU;
20361 else
20363 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20364 code = NE;
20366 break;
20367 case EQ:
20368 case UNEQ:
20369 if (code == EQ && TARGET_IEEE_FP)
20371 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20372 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20373 intcmp_mode = CCmode;
20374 code = EQ;
20376 else
20378 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20379 code = NE;
20381 break;
20382 case NE:
20383 case LTGT:
20384 if (code == NE && TARGET_IEEE_FP)
20386 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20387 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20388 GEN_INT (0x40)));
20389 code = NE;
20391 else
20393 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20394 code = EQ;
20396 break;
20398 case UNORDERED:
20399 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20400 code = NE;
20401 break;
20402 case ORDERED:
20403 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20404 code = EQ;
20405 break;
20407 default:
20408 gcc_unreachable ();
20410 break;
20412 default:
20413 gcc_unreachable();
20416 /* Return the test that should be put into the flags user, i.e.
20417 the bcc, scc, or cmov instruction. */
20418 return gen_rtx_fmt_ee (code, VOIDmode,
20419 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20420 const0_rtx);
20423 static rtx
20424 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20426 rtx ret;
20428 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20429 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20431 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20433 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20434 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20436 else
20437 ret = ix86_expand_int_compare (code, op0, op1);
20439 return ret;
20442 void
20443 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20445 machine_mode mode = GET_MODE (op0);
20446 rtx tmp;
20448 switch (mode)
20450 case SFmode:
20451 case DFmode:
20452 case XFmode:
20453 case QImode:
20454 case HImode:
20455 case SImode:
20456 simple:
20457 tmp = ix86_expand_compare (code, op0, op1);
20458 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20459 gen_rtx_LABEL_REF (VOIDmode, label),
20460 pc_rtx);
20461 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20462 return;
20464 case DImode:
20465 if (TARGET_64BIT)
20466 goto simple;
20467 case TImode:
20468 /* Expand DImode branch into multiple compare+branch. */
20470 rtx lo[2], hi[2];
20471 rtx_code_label *label2;
20472 enum rtx_code code1, code2, code3;
20473 machine_mode submode;
20475 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20477 std::swap (op0, op1);
20478 code = swap_condition (code);
20481 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20482 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20484 submode = mode == DImode ? SImode : DImode;
20486 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20487 avoid two branches. This costs one extra insn, so disable when
20488 optimizing for size. */
20490 if ((code == EQ || code == NE)
20491 && (!optimize_insn_for_size_p ()
20492 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20494 rtx xor0, xor1;
20496 xor1 = hi[0];
20497 if (hi[1] != const0_rtx)
20498 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20499 NULL_RTX, 0, OPTAB_WIDEN);
20501 xor0 = lo[0];
20502 if (lo[1] != const0_rtx)
20503 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20504 NULL_RTX, 0, OPTAB_WIDEN);
20506 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20507 NULL_RTX, 0, OPTAB_WIDEN);
20509 ix86_expand_branch (code, tmp, const0_rtx, label);
20510 return;
20513 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20514 op1 is a constant and the low word is zero, then we can just
20515 examine the high word. Similarly for low word -1 and
20516 less-or-equal-than or greater-than. */
20518 if (CONST_INT_P (hi[1]))
20519 switch (code)
20521 case LT: case LTU: case GE: case GEU:
20522 if (lo[1] == const0_rtx)
20524 ix86_expand_branch (code, hi[0], hi[1], label);
20525 return;
20527 break;
20528 case LE: case LEU: case GT: case GTU:
20529 if (lo[1] == constm1_rtx)
20531 ix86_expand_branch (code, hi[0], hi[1], label);
20532 return;
20534 break;
20535 default:
20536 break;
20539 /* Otherwise, we need two or three jumps. */
20541 label2 = gen_label_rtx ();
20543 code1 = code;
20544 code2 = swap_condition (code);
20545 code3 = unsigned_condition (code);
20547 switch (code)
20549 case LT: case GT: case LTU: case GTU:
20550 break;
20552 case LE: code1 = LT; code2 = GT; break;
20553 case GE: code1 = GT; code2 = LT; break;
20554 case LEU: code1 = LTU; code2 = GTU; break;
20555 case GEU: code1 = GTU; code2 = LTU; break;
20557 case EQ: code1 = UNKNOWN; code2 = NE; break;
20558 case NE: code2 = UNKNOWN; break;
20560 default:
20561 gcc_unreachable ();
20565 * a < b =>
20566 * if (hi(a) < hi(b)) goto true;
20567 * if (hi(a) > hi(b)) goto false;
20568 * if (lo(a) < lo(b)) goto true;
20569 * false:
20572 if (code1 != UNKNOWN)
20573 ix86_expand_branch (code1, hi[0], hi[1], label);
20574 if (code2 != UNKNOWN)
20575 ix86_expand_branch (code2, hi[0], hi[1], label2);
20577 ix86_expand_branch (code3, lo[0], lo[1], label);
20579 if (code2 != UNKNOWN)
20580 emit_label (label2);
20581 return;
20584 default:
20585 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20586 goto simple;
20590 /* Split branch based on floating point condition. */
20591 void
20592 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20593 rtx target1, rtx target2, rtx tmp)
20595 rtx condition;
20596 rtx i;
20598 if (target2 != pc_rtx)
20600 std::swap (target1, target2);
20601 code = reverse_condition_maybe_unordered (code);
20604 condition = ix86_expand_fp_compare (code, op1, op2,
20605 tmp);
20607 i = emit_jump_insn (gen_rtx_SET
20608 (VOIDmode, pc_rtx,
20609 gen_rtx_IF_THEN_ELSE (VOIDmode,
20610 condition, target1, target2)));
20611 if (split_branch_probability >= 0)
20612 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20615 void
20616 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20618 rtx ret;
20620 gcc_assert (GET_MODE (dest) == QImode);
20622 ret = ix86_expand_compare (code, op0, op1);
20623 PUT_MODE (ret, QImode);
20624 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20627 /* Expand comparison setting or clearing carry flag. Return true when
20628 successful and set pop for the operation. */
20629 static bool
20630 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20632 machine_mode mode =
20633 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20635 /* Do not handle double-mode compares that go through special path. */
20636 if (mode == (TARGET_64BIT ? TImode : DImode))
20637 return false;
20639 if (SCALAR_FLOAT_MODE_P (mode))
20641 rtx compare_op;
20642 rtx_insn *compare_seq;
20644 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20646 /* Shortcut: following common codes never translate
20647 into carry flag compares. */
20648 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20649 || code == ORDERED || code == UNORDERED)
20650 return false;
20652 /* These comparisons require zero flag; swap operands so they won't. */
20653 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20654 && !TARGET_IEEE_FP)
20656 std::swap (op0, op1);
20657 code = swap_condition (code);
20660 /* Try to expand the comparison and verify that we end up with
20661 carry flag based comparison. This fails to be true only when
20662 we decide to expand comparison using arithmetic that is not
20663 too common scenario. */
20664 start_sequence ();
20665 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20666 compare_seq = get_insns ();
20667 end_sequence ();
20669 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20670 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20671 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20672 else
20673 code = GET_CODE (compare_op);
20675 if (code != LTU && code != GEU)
20676 return false;
20678 emit_insn (compare_seq);
20679 *pop = compare_op;
20680 return true;
20683 if (!INTEGRAL_MODE_P (mode))
20684 return false;
20686 switch (code)
20688 case LTU:
20689 case GEU:
20690 break;
20692 /* Convert a==0 into (unsigned)a<1. */
20693 case EQ:
20694 case NE:
20695 if (op1 != const0_rtx)
20696 return false;
20697 op1 = const1_rtx;
20698 code = (code == EQ ? LTU : GEU);
20699 break;
20701 /* Convert a>b into b<a or a>=b-1. */
20702 case GTU:
20703 case LEU:
20704 if (CONST_INT_P (op1))
20706 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20707 /* Bail out on overflow. We still can swap operands but that
20708 would force loading of the constant into register. */
20709 if (op1 == const0_rtx
20710 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20711 return false;
20712 code = (code == GTU ? GEU : LTU);
20714 else
20716 std::swap (op0, op1);
20717 code = (code == GTU ? LTU : GEU);
20719 break;
20721 /* Convert a>=0 into (unsigned)a<0x80000000. */
20722 case LT:
20723 case GE:
20724 if (mode == DImode || op1 != const0_rtx)
20725 return false;
20726 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20727 code = (code == LT ? GEU : LTU);
20728 break;
20729 case LE:
20730 case GT:
20731 if (mode == DImode || op1 != constm1_rtx)
20732 return false;
20733 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20734 code = (code == LE ? GEU : LTU);
20735 break;
20737 default:
20738 return false;
20740 /* Swapping operands may cause constant to appear as first operand. */
20741 if (!nonimmediate_operand (op0, VOIDmode))
20743 if (!can_create_pseudo_p ())
20744 return false;
20745 op0 = force_reg (mode, op0);
20747 *pop = ix86_expand_compare (code, op0, op1);
20748 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20749 return true;
20752 bool
20753 ix86_expand_int_movcc (rtx operands[])
20755 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20756 rtx_insn *compare_seq;
20757 rtx compare_op;
20758 machine_mode mode = GET_MODE (operands[0]);
20759 bool sign_bit_compare_p = false;
20760 rtx op0 = XEXP (operands[1], 0);
20761 rtx op1 = XEXP (operands[1], 1);
20763 if (GET_MODE (op0) == TImode
20764 || (GET_MODE (op0) == DImode
20765 && !TARGET_64BIT))
20766 return false;
20768 start_sequence ();
20769 compare_op = ix86_expand_compare (code, op0, op1);
20770 compare_seq = get_insns ();
20771 end_sequence ();
20773 compare_code = GET_CODE (compare_op);
20775 if ((op1 == const0_rtx && (code == GE || code == LT))
20776 || (op1 == constm1_rtx && (code == GT || code == LE)))
20777 sign_bit_compare_p = true;
20779 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20780 HImode insns, we'd be swallowed in word prefix ops. */
20782 if ((mode != HImode || TARGET_FAST_PREFIX)
20783 && (mode != (TARGET_64BIT ? TImode : DImode))
20784 && CONST_INT_P (operands[2])
20785 && CONST_INT_P (operands[3]))
20787 rtx out = operands[0];
20788 HOST_WIDE_INT ct = INTVAL (operands[2]);
20789 HOST_WIDE_INT cf = INTVAL (operands[3]);
20790 HOST_WIDE_INT diff;
20792 diff = ct - cf;
20793 /* Sign bit compares are better done using shifts than we do by using
20794 sbb. */
20795 if (sign_bit_compare_p
20796 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20798 /* Detect overlap between destination and compare sources. */
20799 rtx tmp = out;
20801 if (!sign_bit_compare_p)
20803 rtx flags;
20804 bool fpcmp = false;
20806 compare_code = GET_CODE (compare_op);
20808 flags = XEXP (compare_op, 0);
20810 if (GET_MODE (flags) == CCFPmode
20811 || GET_MODE (flags) == CCFPUmode)
20813 fpcmp = true;
20814 compare_code
20815 = ix86_fp_compare_code_to_integer (compare_code);
20818 /* To simplify rest of code, restrict to the GEU case. */
20819 if (compare_code == LTU)
20821 std::swap (ct, cf);
20822 compare_code = reverse_condition (compare_code);
20823 code = reverse_condition (code);
20825 else
20827 if (fpcmp)
20828 PUT_CODE (compare_op,
20829 reverse_condition_maybe_unordered
20830 (GET_CODE (compare_op)));
20831 else
20832 PUT_CODE (compare_op,
20833 reverse_condition (GET_CODE (compare_op)));
20835 diff = ct - cf;
20837 if (reg_overlap_mentioned_p (out, op0)
20838 || reg_overlap_mentioned_p (out, op1))
20839 tmp = gen_reg_rtx (mode);
20841 if (mode == DImode)
20842 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20843 else
20844 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20845 flags, compare_op));
20847 else
20849 if (code == GT || code == GE)
20850 code = reverse_condition (code);
20851 else
20853 std::swap (ct, cf);
20854 diff = ct - cf;
20856 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20859 if (diff == 1)
20862 * cmpl op0,op1
20863 * sbbl dest,dest
20864 * [addl dest, ct]
20866 * Size 5 - 8.
20868 if (ct)
20869 tmp = expand_simple_binop (mode, PLUS,
20870 tmp, GEN_INT (ct),
20871 copy_rtx (tmp), 1, OPTAB_DIRECT);
20873 else if (cf == -1)
20876 * cmpl op0,op1
20877 * sbbl dest,dest
20878 * orl $ct, dest
20880 * Size 8.
20882 tmp = expand_simple_binop (mode, IOR,
20883 tmp, GEN_INT (ct),
20884 copy_rtx (tmp), 1, OPTAB_DIRECT);
20886 else if (diff == -1 && ct)
20889 * cmpl op0,op1
20890 * sbbl dest,dest
20891 * notl dest
20892 * [addl dest, cf]
20894 * Size 8 - 11.
20896 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20897 if (cf)
20898 tmp = expand_simple_binop (mode, PLUS,
20899 copy_rtx (tmp), GEN_INT (cf),
20900 copy_rtx (tmp), 1, OPTAB_DIRECT);
20902 else
20905 * cmpl op0,op1
20906 * sbbl dest,dest
20907 * [notl dest]
20908 * andl cf - ct, dest
20909 * [addl dest, ct]
20911 * Size 8 - 11.
20914 if (cf == 0)
20916 cf = ct;
20917 ct = 0;
20918 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20921 tmp = expand_simple_binop (mode, AND,
20922 copy_rtx (tmp),
20923 gen_int_mode (cf - ct, mode),
20924 copy_rtx (tmp), 1, OPTAB_DIRECT);
20925 if (ct)
20926 tmp = expand_simple_binop (mode, PLUS,
20927 copy_rtx (tmp), GEN_INT (ct),
20928 copy_rtx (tmp), 1, OPTAB_DIRECT);
20931 if (!rtx_equal_p (tmp, out))
20932 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20934 return true;
20937 if (diff < 0)
20939 machine_mode cmp_mode = GET_MODE (op0);
20940 enum rtx_code new_code;
20942 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20944 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20946 /* We may be reversing unordered compare to normal compare, that
20947 is not valid in general (we may convert non-trapping condition
20948 to trapping one), however on i386 we currently emit all
20949 comparisons unordered. */
20950 new_code = reverse_condition_maybe_unordered (code);
20952 else
20953 new_code = ix86_reverse_condition (code, cmp_mode);
20954 if (new_code != UNKNOWN)
20956 std::swap (ct, cf);
20957 diff = -diff;
20958 code = new_code;
20962 compare_code = UNKNOWN;
20963 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20964 && CONST_INT_P (op1))
20966 if (op1 == const0_rtx
20967 && (code == LT || code == GE))
20968 compare_code = code;
20969 else if (op1 == constm1_rtx)
20971 if (code == LE)
20972 compare_code = LT;
20973 else if (code == GT)
20974 compare_code = GE;
20978 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20979 if (compare_code != UNKNOWN
20980 && GET_MODE (op0) == GET_MODE (out)
20981 && (cf == -1 || ct == -1))
20983 /* If lea code below could be used, only optimize
20984 if it results in a 2 insn sequence. */
20986 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20987 || diff == 3 || diff == 5 || diff == 9)
20988 || (compare_code == LT && ct == -1)
20989 || (compare_code == GE && cf == -1))
20992 * notl op1 (if necessary)
20993 * sarl $31, op1
20994 * orl cf, op1
20996 if (ct != -1)
20998 cf = ct;
20999 ct = -1;
21000 code = reverse_condition (code);
21003 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21005 out = expand_simple_binop (mode, IOR,
21006 out, GEN_INT (cf),
21007 out, 1, OPTAB_DIRECT);
21008 if (out != operands[0])
21009 emit_move_insn (operands[0], out);
21011 return true;
21016 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
21017 || diff == 3 || diff == 5 || diff == 9)
21018 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
21019 && (mode != DImode
21020 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
21023 * xorl dest,dest
21024 * cmpl op1,op2
21025 * setcc dest
21026 * lea cf(dest*(ct-cf)),dest
21028 * Size 14.
21030 * This also catches the degenerate setcc-only case.
21033 rtx tmp;
21034 int nops;
21036 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21038 nops = 0;
21039 /* On x86_64 the lea instruction operates on Pmode, so we need
21040 to get arithmetics done in proper mode to match. */
21041 if (diff == 1)
21042 tmp = copy_rtx (out);
21043 else
21045 rtx out1;
21046 out1 = copy_rtx (out);
21047 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
21048 nops++;
21049 if (diff & 1)
21051 tmp = gen_rtx_PLUS (mode, tmp, out1);
21052 nops++;
21055 if (cf != 0)
21057 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
21058 nops++;
21060 if (!rtx_equal_p (tmp, out))
21062 if (nops == 1)
21063 out = force_operand (tmp, copy_rtx (out));
21064 else
21065 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
21067 if (!rtx_equal_p (out, operands[0]))
21068 emit_move_insn (operands[0], copy_rtx (out));
21070 return true;
21074 * General case: Jumpful:
21075 * xorl dest,dest cmpl op1, op2
21076 * cmpl op1, op2 movl ct, dest
21077 * setcc dest jcc 1f
21078 * decl dest movl cf, dest
21079 * andl (cf-ct),dest 1:
21080 * addl ct,dest
21082 * Size 20. Size 14.
21084 * This is reasonably steep, but branch mispredict costs are
21085 * high on modern cpus, so consider failing only if optimizing
21086 * for space.
21089 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21090 && BRANCH_COST (optimize_insn_for_speed_p (),
21091 false) >= 2)
21093 if (cf == 0)
21095 machine_mode cmp_mode = GET_MODE (op0);
21096 enum rtx_code new_code;
21098 if (SCALAR_FLOAT_MODE_P (cmp_mode))
21100 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
21102 /* We may be reversing unordered compare to normal compare,
21103 that is not valid in general (we may convert non-trapping
21104 condition to trapping one), however on i386 we currently
21105 emit all comparisons unordered. */
21106 new_code = reverse_condition_maybe_unordered (code);
21108 else
21110 new_code = ix86_reverse_condition (code, cmp_mode);
21111 if (compare_code != UNKNOWN && new_code != UNKNOWN)
21112 compare_code = reverse_condition (compare_code);
21115 if (new_code != UNKNOWN)
21117 cf = ct;
21118 ct = 0;
21119 code = new_code;
21123 if (compare_code != UNKNOWN)
21125 /* notl op1 (if needed)
21126 sarl $31, op1
21127 andl (cf-ct), op1
21128 addl ct, op1
21130 For x < 0 (resp. x <= -1) there will be no notl,
21131 so if possible swap the constants to get rid of the
21132 complement.
21133 True/false will be -1/0 while code below (store flag
21134 followed by decrement) is 0/-1, so the constants need
21135 to be exchanged once more. */
21137 if (compare_code == GE || !cf)
21139 code = reverse_condition (code);
21140 compare_code = LT;
21142 else
21143 std::swap (ct, cf);
21145 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21147 else
21149 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21151 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
21152 constm1_rtx,
21153 copy_rtx (out), 1, OPTAB_DIRECT);
21156 out = expand_simple_binop (mode, AND, copy_rtx (out),
21157 gen_int_mode (cf - ct, mode),
21158 copy_rtx (out), 1, OPTAB_DIRECT);
21159 if (ct)
21160 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
21161 copy_rtx (out), 1, OPTAB_DIRECT);
21162 if (!rtx_equal_p (out, operands[0]))
21163 emit_move_insn (operands[0], copy_rtx (out));
21165 return true;
21169 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21171 /* Try a few things more with specific constants and a variable. */
21173 optab op;
21174 rtx var, orig_out, out, tmp;
21176 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
21177 return false;
21179 /* If one of the two operands is an interesting constant, load a
21180 constant with the above and mask it in with a logical operation. */
21182 if (CONST_INT_P (operands[2]))
21184 var = operands[3];
21185 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
21186 operands[3] = constm1_rtx, op = and_optab;
21187 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
21188 operands[3] = const0_rtx, op = ior_optab;
21189 else
21190 return false;
21192 else if (CONST_INT_P (operands[3]))
21194 var = operands[2];
21195 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21196 operands[2] = constm1_rtx, op = and_optab;
21197 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21198 operands[2] = const0_rtx, op = ior_optab;
21199 else
21200 return false;
21202 else
21203 return false;
21205 orig_out = operands[0];
21206 tmp = gen_reg_rtx (mode);
21207 operands[0] = tmp;
21209 /* Recurse to get the constant loaded. */
21210 if (ix86_expand_int_movcc (operands) == 0)
21211 return false;
21213 /* Mask in the interesting variable. */
21214 out = expand_binop (mode, op, var, tmp, orig_out, 0,
21215 OPTAB_WIDEN);
21216 if (!rtx_equal_p (out, orig_out))
21217 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21219 return true;
21223 * For comparison with above,
21225 * movl cf,dest
21226 * movl ct,tmp
21227 * cmpl op1,op2
21228 * cmovcc tmp,dest
21230 * Size 15.
21233 if (! nonimmediate_operand (operands[2], mode))
21234 operands[2] = force_reg (mode, operands[2]);
21235 if (! nonimmediate_operand (operands[3], mode))
21236 operands[3] = force_reg (mode, operands[3]);
21238 if (! register_operand (operands[2], VOIDmode)
21239 && (mode == QImode
21240 || ! register_operand (operands[3], VOIDmode)))
21241 operands[2] = force_reg (mode, operands[2]);
21243 if (mode == QImode
21244 && ! register_operand (operands[3], VOIDmode))
21245 operands[3] = force_reg (mode, operands[3]);
21247 emit_insn (compare_seq);
21248 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21249 gen_rtx_IF_THEN_ELSE (mode,
21250 compare_op, operands[2],
21251 operands[3])));
21252 return true;
21255 /* Swap, force into registers, or otherwise massage the two operands
21256 to an sse comparison with a mask result. Thus we differ a bit from
21257 ix86_prepare_fp_compare_args which expects to produce a flags result.
21259 The DEST operand exists to help determine whether to commute commutative
21260 operators. The POP0/POP1 operands are updated in place. The new
21261 comparison code is returned, or UNKNOWN if not implementable. */
21263 static enum rtx_code
21264 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21265 rtx *pop0, rtx *pop1)
21267 switch (code)
21269 case LTGT:
21270 case UNEQ:
21271 /* AVX supports all the needed comparisons. */
21272 if (TARGET_AVX)
21273 break;
21274 /* We have no LTGT as an operator. We could implement it with
21275 NE & ORDERED, but this requires an extra temporary. It's
21276 not clear that it's worth it. */
21277 return UNKNOWN;
21279 case LT:
21280 case LE:
21281 case UNGT:
21282 case UNGE:
21283 /* These are supported directly. */
21284 break;
21286 case EQ:
21287 case NE:
21288 case UNORDERED:
21289 case ORDERED:
21290 /* AVX has 3 operand comparisons, no need to swap anything. */
21291 if (TARGET_AVX)
21292 break;
21293 /* For commutative operators, try to canonicalize the destination
21294 operand to be first in the comparison - this helps reload to
21295 avoid extra moves. */
21296 if (!dest || !rtx_equal_p (dest, *pop1))
21297 break;
21298 /* FALLTHRU */
21300 case GE:
21301 case GT:
21302 case UNLE:
21303 case UNLT:
21304 /* These are not supported directly before AVX, and furthermore
21305 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
21306 comparison operands to transform into something that is
21307 supported. */
21308 std::swap (*pop0, *pop1);
21309 code = swap_condition (code);
21310 break;
21312 default:
21313 gcc_unreachable ();
21316 return code;
21319 /* Detect conditional moves that exactly match min/max operational
21320 semantics. Note that this is IEEE safe, as long as we don't
21321 interchange the operands.
21323 Returns FALSE if this conditional move doesn't match a MIN/MAX,
21324 and TRUE if the operation is successful and instructions are emitted. */
21326 static bool
21327 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21328 rtx cmp_op1, rtx if_true, rtx if_false)
21330 machine_mode mode;
21331 bool is_min;
21332 rtx tmp;
21334 if (code == LT)
21336 else if (code == UNGE)
21337 std::swap (if_true, if_false);
21338 else
21339 return false;
21341 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21342 is_min = true;
21343 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21344 is_min = false;
21345 else
21346 return false;
21348 mode = GET_MODE (dest);
21350 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21351 but MODE may be a vector mode and thus not appropriate. */
21352 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21354 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21355 rtvec v;
21357 if_true = force_reg (mode, if_true);
21358 v = gen_rtvec (2, if_true, if_false);
21359 tmp = gen_rtx_UNSPEC (mode, v, u);
21361 else
21363 code = is_min ? SMIN : SMAX;
21364 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21367 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
21368 return true;
21371 /* Expand an sse vector comparison. Return the register with the result. */
21373 static rtx
21374 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21375 rtx op_true, rtx op_false)
21377 machine_mode mode = GET_MODE (dest);
21378 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21380 /* In general case result of comparison can differ from operands' type. */
21381 machine_mode cmp_mode;
21383 /* In AVX512F the result of comparison is an integer mask. */
21384 bool maskcmp = false;
21385 rtx x;
21387 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21389 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21390 gcc_assert (cmp_mode != BLKmode);
21392 maskcmp = true;
21394 else
21395 cmp_mode = cmp_ops_mode;
21398 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21399 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21400 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21402 if (optimize
21403 || reg_overlap_mentioned_p (dest, op_true)
21404 || reg_overlap_mentioned_p (dest, op_false))
21405 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21407 /* Compare patterns for int modes are unspec in AVX512F only. */
21408 if (maskcmp && (code == GT || code == EQ))
21410 rtx (*gen)(rtx, rtx, rtx);
21412 switch (cmp_ops_mode)
21414 case V64QImode:
21415 gcc_assert (TARGET_AVX512BW);
21416 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
21417 break;
21418 case V32HImode:
21419 gcc_assert (TARGET_AVX512BW);
21420 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
21421 break;
21422 case V16SImode:
21423 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21424 break;
21425 case V8DImode:
21426 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21427 break;
21428 default:
21429 gen = NULL;
21432 if (gen)
21434 emit_insn (gen (dest, cmp_op0, cmp_op1));
21435 return dest;
21438 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21440 if (cmp_mode != mode && !maskcmp)
21442 x = force_reg (cmp_ops_mode, x);
21443 convert_move (dest, x, false);
21445 else
21446 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21448 return dest;
21451 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21452 operations. This is used for both scalar and vector conditional moves. */
21454 static void
21455 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21457 machine_mode mode = GET_MODE (dest);
21458 machine_mode cmpmode = GET_MODE (cmp);
21460 /* In AVX512F the result of comparison is an integer mask. */
21461 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21463 rtx t2, t3, x;
21465 if (vector_all_ones_operand (op_true, mode)
21466 && rtx_equal_p (op_false, CONST0_RTX (mode))
21467 && !maskcmp)
21469 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
21471 else if (op_false == CONST0_RTX (mode)
21472 && !maskcmp)
21474 op_true = force_reg (mode, op_true);
21475 x = gen_rtx_AND (mode, cmp, op_true);
21476 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21478 else if (op_true == CONST0_RTX (mode)
21479 && !maskcmp)
21481 op_false = force_reg (mode, op_false);
21482 x = gen_rtx_NOT (mode, cmp);
21483 x = gen_rtx_AND (mode, x, op_false);
21484 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21486 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21487 && !maskcmp)
21489 op_false = force_reg (mode, op_false);
21490 x = gen_rtx_IOR (mode, cmp, op_false);
21491 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21493 else if (TARGET_XOP
21494 && !maskcmp)
21496 op_true = force_reg (mode, op_true);
21498 if (!nonimmediate_operand (op_false, mode))
21499 op_false = force_reg (mode, op_false);
21501 emit_insn (gen_rtx_SET (mode, dest,
21502 gen_rtx_IF_THEN_ELSE (mode, cmp,
21503 op_true,
21504 op_false)));
21506 else
21508 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21509 rtx d = dest;
21511 if (!nonimmediate_operand (op_true, mode))
21512 op_true = force_reg (mode, op_true);
21514 op_false = force_reg (mode, op_false);
21516 switch (mode)
21518 case V4SFmode:
21519 if (TARGET_SSE4_1)
21520 gen = gen_sse4_1_blendvps;
21521 break;
21522 case V2DFmode:
21523 if (TARGET_SSE4_1)
21524 gen = gen_sse4_1_blendvpd;
21525 break;
21526 case V16QImode:
21527 case V8HImode:
21528 case V4SImode:
21529 case V2DImode:
21530 if (TARGET_SSE4_1)
21532 gen = gen_sse4_1_pblendvb;
21533 if (mode != V16QImode)
21534 d = gen_reg_rtx (V16QImode);
21535 op_false = gen_lowpart (V16QImode, op_false);
21536 op_true = gen_lowpart (V16QImode, op_true);
21537 cmp = gen_lowpart (V16QImode, cmp);
21539 break;
21540 case V8SFmode:
21541 if (TARGET_AVX)
21542 gen = gen_avx_blendvps256;
21543 break;
21544 case V4DFmode:
21545 if (TARGET_AVX)
21546 gen = gen_avx_blendvpd256;
21547 break;
21548 case V32QImode:
21549 case V16HImode:
21550 case V8SImode:
21551 case V4DImode:
21552 if (TARGET_AVX2)
21554 gen = gen_avx2_pblendvb;
21555 if (mode != V32QImode)
21556 d = gen_reg_rtx (V32QImode);
21557 op_false = gen_lowpart (V32QImode, op_false);
21558 op_true = gen_lowpart (V32QImode, op_true);
21559 cmp = gen_lowpart (V32QImode, cmp);
21561 break;
21563 case V64QImode:
21564 gen = gen_avx512bw_blendmv64qi;
21565 break;
21566 case V32HImode:
21567 gen = gen_avx512bw_blendmv32hi;
21568 break;
21569 case V16SImode:
21570 gen = gen_avx512f_blendmv16si;
21571 break;
21572 case V8DImode:
21573 gen = gen_avx512f_blendmv8di;
21574 break;
21575 case V8DFmode:
21576 gen = gen_avx512f_blendmv8df;
21577 break;
21578 case V16SFmode:
21579 gen = gen_avx512f_blendmv16sf;
21580 break;
21582 default:
21583 break;
21586 if (gen != NULL)
21588 emit_insn (gen (d, op_false, op_true, cmp));
21589 if (d != dest)
21590 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21592 else
21594 op_true = force_reg (mode, op_true);
21596 t2 = gen_reg_rtx (mode);
21597 if (optimize)
21598 t3 = gen_reg_rtx (mode);
21599 else
21600 t3 = dest;
21602 x = gen_rtx_AND (mode, op_true, cmp);
21603 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21605 x = gen_rtx_NOT (mode, cmp);
21606 x = gen_rtx_AND (mode, x, op_false);
21607 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21609 x = gen_rtx_IOR (mode, t3, t2);
21610 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21615 /* Expand a floating-point conditional move. Return true if successful. */
21617 bool
21618 ix86_expand_fp_movcc (rtx operands[])
21620 machine_mode mode = GET_MODE (operands[0]);
21621 enum rtx_code code = GET_CODE (operands[1]);
21622 rtx tmp, compare_op;
21623 rtx op0 = XEXP (operands[1], 0);
21624 rtx op1 = XEXP (operands[1], 1);
21626 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21628 machine_mode cmode;
21630 /* Since we've no cmove for sse registers, don't force bad register
21631 allocation just to gain access to it. Deny movcc when the
21632 comparison mode doesn't match the move mode. */
21633 cmode = GET_MODE (op0);
21634 if (cmode == VOIDmode)
21635 cmode = GET_MODE (op1);
21636 if (cmode != mode)
21637 return false;
21639 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21640 if (code == UNKNOWN)
21641 return false;
21643 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21644 operands[2], operands[3]))
21645 return true;
21647 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21648 operands[2], operands[3]);
21649 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21650 return true;
21653 if (GET_MODE (op0) == TImode
21654 || (GET_MODE (op0) == DImode
21655 && !TARGET_64BIT))
21656 return false;
21658 /* The floating point conditional move instructions don't directly
21659 support conditions resulting from a signed integer comparison. */
21661 compare_op = ix86_expand_compare (code, op0, op1);
21662 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21664 tmp = gen_reg_rtx (QImode);
21665 ix86_expand_setcc (tmp, code, op0, op1);
21667 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21670 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21671 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21672 operands[2], operands[3])));
21674 return true;
21677 /* Expand a floating-point vector conditional move; a vcond operation
21678 rather than a movcc operation. */
21680 bool
21681 ix86_expand_fp_vcond (rtx operands[])
21683 enum rtx_code code = GET_CODE (operands[3]);
21684 rtx cmp;
21686 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21687 &operands[4], &operands[5]);
21688 if (code == UNKNOWN)
21690 rtx temp;
21691 switch (GET_CODE (operands[3]))
21693 case LTGT:
21694 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21695 operands[5], operands[0], operands[0]);
21696 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21697 operands[5], operands[1], operands[2]);
21698 code = AND;
21699 break;
21700 case UNEQ:
21701 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21702 operands[5], operands[0], operands[0]);
21703 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21704 operands[5], operands[1], operands[2]);
21705 code = IOR;
21706 break;
21707 default:
21708 gcc_unreachable ();
21710 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21711 OPTAB_DIRECT);
21712 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21713 return true;
21716 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21717 operands[5], operands[1], operands[2]))
21718 return true;
21720 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21721 operands[1], operands[2]);
21722 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21723 return true;
21726 /* Expand a signed/unsigned integral vector conditional move. */
21728 bool
21729 ix86_expand_int_vcond (rtx operands[])
21731 machine_mode data_mode = GET_MODE (operands[0]);
21732 machine_mode mode = GET_MODE (operands[4]);
21733 enum rtx_code code = GET_CODE (operands[3]);
21734 bool negate = false;
21735 rtx x, cop0, cop1;
21737 cop0 = operands[4];
21738 cop1 = operands[5];
21740 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21741 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21742 if ((code == LT || code == GE)
21743 && data_mode == mode
21744 && cop1 == CONST0_RTX (mode)
21745 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21746 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21747 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21748 && (GET_MODE_SIZE (data_mode) == 16
21749 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21751 rtx negop = operands[2 - (code == LT)];
21752 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21753 if (negop == CONST1_RTX (data_mode))
21755 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21756 operands[0], 1, OPTAB_DIRECT);
21757 if (res != operands[0])
21758 emit_move_insn (operands[0], res);
21759 return true;
21761 else if (GET_MODE_INNER (data_mode) != DImode
21762 && vector_all_ones_operand (negop, data_mode))
21764 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21765 operands[0], 0, OPTAB_DIRECT);
21766 if (res != operands[0])
21767 emit_move_insn (operands[0], res);
21768 return true;
21772 if (!nonimmediate_operand (cop1, mode))
21773 cop1 = force_reg (mode, cop1);
21774 if (!general_operand (operands[1], data_mode))
21775 operands[1] = force_reg (data_mode, operands[1]);
21776 if (!general_operand (operands[2], data_mode))
21777 operands[2] = force_reg (data_mode, operands[2]);
21779 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21780 if (TARGET_XOP
21781 && (mode == V16QImode || mode == V8HImode
21782 || mode == V4SImode || mode == V2DImode))
21784 else
21786 /* Canonicalize the comparison to EQ, GT, GTU. */
21787 switch (code)
21789 case EQ:
21790 case GT:
21791 case GTU:
21792 break;
21794 case NE:
21795 case LE:
21796 case LEU:
21797 code = reverse_condition (code);
21798 negate = true;
21799 break;
21801 case GE:
21802 case GEU:
21803 code = reverse_condition (code);
21804 negate = true;
21805 /* FALLTHRU */
21807 case LT:
21808 case LTU:
21809 std::swap (cop0, cop1);
21810 code = swap_condition (code);
21811 break;
21813 default:
21814 gcc_unreachable ();
21817 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21818 if (mode == V2DImode)
21820 switch (code)
21822 case EQ:
21823 /* SSE4.1 supports EQ. */
21824 if (!TARGET_SSE4_1)
21825 return false;
21826 break;
21828 case GT:
21829 case GTU:
21830 /* SSE4.2 supports GT/GTU. */
21831 if (!TARGET_SSE4_2)
21832 return false;
21833 break;
21835 default:
21836 gcc_unreachable ();
21840 /* Unsigned parallel compare is not supported by the hardware.
21841 Play some tricks to turn this into a signed comparison
21842 against 0. */
21843 if (code == GTU)
21845 cop0 = force_reg (mode, cop0);
21847 switch (mode)
21849 case V16SImode:
21850 case V8DImode:
21851 case V8SImode:
21852 case V4DImode:
21853 case V4SImode:
21854 case V2DImode:
21856 rtx t1, t2, mask;
21857 rtx (*gen_sub3) (rtx, rtx, rtx);
21859 switch (mode)
21861 case V16SImode: gen_sub3 = gen_subv16si3; break;
21862 case V8DImode: gen_sub3 = gen_subv8di3; break;
21863 case V8SImode: gen_sub3 = gen_subv8si3; break;
21864 case V4DImode: gen_sub3 = gen_subv4di3; break;
21865 case V4SImode: gen_sub3 = gen_subv4si3; break;
21866 case V2DImode: gen_sub3 = gen_subv2di3; break;
21867 default:
21868 gcc_unreachable ();
21870 /* Subtract (-(INT MAX) - 1) from both operands to make
21871 them signed. */
21872 mask = ix86_build_signbit_mask (mode, true, false);
21873 t1 = gen_reg_rtx (mode);
21874 emit_insn (gen_sub3 (t1, cop0, mask));
21876 t2 = gen_reg_rtx (mode);
21877 emit_insn (gen_sub3 (t2, cop1, mask));
21879 cop0 = t1;
21880 cop1 = t2;
21881 code = GT;
21883 break;
21885 case V64QImode:
21886 case V32HImode:
21887 case V32QImode:
21888 case V16HImode:
21889 case V16QImode:
21890 case V8HImode:
21891 /* Perform a parallel unsigned saturating subtraction. */
21892 x = gen_reg_rtx (mode);
21893 emit_insn (gen_rtx_SET (VOIDmode, x,
21894 gen_rtx_US_MINUS (mode, cop0, cop1)));
21896 cop0 = x;
21897 cop1 = CONST0_RTX (mode);
21898 code = EQ;
21899 negate = !negate;
21900 break;
21902 default:
21903 gcc_unreachable ();
21908 /* Allow the comparison to be done in one mode, but the movcc to
21909 happen in another mode. */
21910 if (data_mode == mode)
21912 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21913 operands[1+negate], operands[2-negate]);
21915 else
21917 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21918 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21919 operands[1+negate], operands[2-negate]);
21920 if (GET_MODE (x) == mode)
21921 x = gen_lowpart (data_mode, x);
21924 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21925 operands[2-negate]);
21926 return true;
21929 /* AVX512F does support 64-byte integer vector operations,
21930 thus the longest vector we are faced with is V64QImode. */
21931 #define MAX_VECT_LEN 64
21933 struct expand_vec_perm_d
21935 rtx target, op0, op1;
21936 unsigned char perm[MAX_VECT_LEN];
21937 machine_mode vmode;
21938 unsigned char nelt;
21939 bool one_operand_p;
21940 bool testing_p;
21943 static bool
21944 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21945 struct expand_vec_perm_d *d)
21947 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21948 expander, so args are either in d, or in op0, op1 etc. */
21949 machine_mode mode = GET_MODE (d ? d->op0 : op0);
21950 machine_mode maskmode = mode;
21951 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21953 switch (mode)
21955 case V8HImode:
21956 if (TARGET_AVX512VL && TARGET_AVX512BW)
21957 gen = gen_avx512vl_vpermi2varv8hi3;
21958 break;
21959 case V16HImode:
21960 if (TARGET_AVX512VL && TARGET_AVX512BW)
21961 gen = gen_avx512vl_vpermi2varv16hi3;
21962 break;
21963 case V64QImode:
21964 if (TARGET_AVX512VBMI)
21965 gen = gen_avx512bw_vpermi2varv64qi3;
21966 break;
21967 case V32HImode:
21968 if (TARGET_AVX512BW)
21969 gen = gen_avx512bw_vpermi2varv32hi3;
21970 break;
21971 case V4SImode:
21972 if (TARGET_AVX512VL)
21973 gen = gen_avx512vl_vpermi2varv4si3;
21974 break;
21975 case V8SImode:
21976 if (TARGET_AVX512VL)
21977 gen = gen_avx512vl_vpermi2varv8si3;
21978 break;
21979 case V16SImode:
21980 if (TARGET_AVX512F)
21981 gen = gen_avx512f_vpermi2varv16si3;
21982 break;
21983 case V4SFmode:
21984 if (TARGET_AVX512VL)
21986 gen = gen_avx512vl_vpermi2varv4sf3;
21987 maskmode = V4SImode;
21989 break;
21990 case V8SFmode:
21991 if (TARGET_AVX512VL)
21993 gen = gen_avx512vl_vpermi2varv8sf3;
21994 maskmode = V8SImode;
21996 break;
21997 case V16SFmode:
21998 if (TARGET_AVX512F)
22000 gen = gen_avx512f_vpermi2varv16sf3;
22001 maskmode = V16SImode;
22003 break;
22004 case V2DImode:
22005 if (TARGET_AVX512VL)
22006 gen = gen_avx512vl_vpermi2varv2di3;
22007 break;
22008 case V4DImode:
22009 if (TARGET_AVX512VL)
22010 gen = gen_avx512vl_vpermi2varv4di3;
22011 break;
22012 case V8DImode:
22013 if (TARGET_AVX512F)
22014 gen = gen_avx512f_vpermi2varv8di3;
22015 break;
22016 case V2DFmode:
22017 if (TARGET_AVX512VL)
22019 gen = gen_avx512vl_vpermi2varv2df3;
22020 maskmode = V2DImode;
22022 break;
22023 case V4DFmode:
22024 if (TARGET_AVX512VL)
22026 gen = gen_avx512vl_vpermi2varv4df3;
22027 maskmode = V4DImode;
22029 break;
22030 case V8DFmode:
22031 if (TARGET_AVX512F)
22033 gen = gen_avx512f_vpermi2varv8df3;
22034 maskmode = V8DImode;
22036 break;
22037 default:
22038 break;
22041 if (gen == NULL)
22042 return false;
22044 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
22045 expander, so args are either in d, or in op0, op1 etc. */
22046 if (d)
22048 rtx vec[64];
22049 target = d->target;
22050 op0 = d->op0;
22051 op1 = d->op1;
22052 for (int i = 0; i < d->nelt; ++i)
22053 vec[i] = GEN_INT (d->perm[i]);
22054 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
22057 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
22058 return true;
22061 /* Expand a variable vector permutation. */
22063 void
22064 ix86_expand_vec_perm (rtx operands[])
22066 rtx target = operands[0];
22067 rtx op0 = operands[1];
22068 rtx op1 = operands[2];
22069 rtx mask = operands[3];
22070 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
22071 machine_mode mode = GET_MODE (op0);
22072 machine_mode maskmode = GET_MODE (mask);
22073 int w, e, i;
22074 bool one_operand_shuffle = rtx_equal_p (op0, op1);
22076 /* Number of elements in the vector. */
22077 w = GET_MODE_NUNITS (mode);
22078 e = GET_MODE_UNIT_SIZE (mode);
22079 gcc_assert (w <= 64);
22081 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
22082 return;
22084 if (TARGET_AVX2)
22086 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
22088 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
22089 an constant shuffle operand. With a tiny bit of effort we can
22090 use VPERMD instead. A re-interpretation stall for V4DFmode is
22091 unfortunate but there's no avoiding it.
22092 Similarly for V16HImode we don't have instructions for variable
22093 shuffling, while for V32QImode we can use after preparing suitable
22094 masks vpshufb; vpshufb; vpermq; vpor. */
22096 if (mode == V16HImode)
22098 maskmode = mode = V32QImode;
22099 w = 32;
22100 e = 1;
22102 else
22104 maskmode = mode = V8SImode;
22105 w = 8;
22106 e = 4;
22108 t1 = gen_reg_rtx (maskmode);
22110 /* Replicate the low bits of the V4DImode mask into V8SImode:
22111 mask = { A B C D }
22112 t1 = { A A B B C C D D }. */
22113 for (i = 0; i < w / 2; ++i)
22114 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
22115 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22116 vt = force_reg (maskmode, vt);
22117 mask = gen_lowpart (maskmode, mask);
22118 if (maskmode == V8SImode)
22119 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
22120 else
22121 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
22123 /* Multiply the shuffle indicies by two. */
22124 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
22125 OPTAB_DIRECT);
22127 /* Add one to the odd shuffle indicies:
22128 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
22129 for (i = 0; i < w / 2; ++i)
22131 vec[i * 2] = const0_rtx;
22132 vec[i * 2 + 1] = const1_rtx;
22134 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22135 vt = validize_mem (force_const_mem (maskmode, vt));
22136 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
22137 OPTAB_DIRECT);
22139 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
22140 operands[3] = mask = t1;
22141 target = gen_reg_rtx (mode);
22142 op0 = gen_lowpart (mode, op0);
22143 op1 = gen_lowpart (mode, op1);
22146 switch (mode)
22148 case V8SImode:
22149 /* The VPERMD and VPERMPS instructions already properly ignore
22150 the high bits of the shuffle elements. No need for us to
22151 perform an AND ourselves. */
22152 if (one_operand_shuffle)
22154 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
22155 if (target != operands[0])
22156 emit_move_insn (operands[0],
22157 gen_lowpart (GET_MODE (operands[0]), target));
22159 else
22161 t1 = gen_reg_rtx (V8SImode);
22162 t2 = gen_reg_rtx (V8SImode);
22163 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
22164 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
22165 goto merge_two;
22167 return;
22169 case V8SFmode:
22170 mask = gen_lowpart (V8SImode, mask);
22171 if (one_operand_shuffle)
22172 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
22173 else
22175 t1 = gen_reg_rtx (V8SFmode);
22176 t2 = gen_reg_rtx (V8SFmode);
22177 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
22178 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
22179 goto merge_two;
22181 return;
22183 case V4SImode:
22184 /* By combining the two 128-bit input vectors into one 256-bit
22185 input vector, we can use VPERMD and VPERMPS for the full
22186 two-operand shuffle. */
22187 t1 = gen_reg_rtx (V8SImode);
22188 t2 = gen_reg_rtx (V8SImode);
22189 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
22190 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22191 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
22192 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
22193 return;
22195 case V4SFmode:
22196 t1 = gen_reg_rtx (V8SFmode);
22197 t2 = gen_reg_rtx (V8SImode);
22198 mask = gen_lowpart (V4SImode, mask);
22199 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
22200 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22201 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
22202 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
22203 return;
22205 case V32QImode:
22206 t1 = gen_reg_rtx (V32QImode);
22207 t2 = gen_reg_rtx (V32QImode);
22208 t3 = gen_reg_rtx (V32QImode);
22209 vt2 = GEN_INT (-128);
22210 for (i = 0; i < 32; i++)
22211 vec[i] = vt2;
22212 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22213 vt = force_reg (V32QImode, vt);
22214 for (i = 0; i < 32; i++)
22215 vec[i] = i < 16 ? vt2 : const0_rtx;
22216 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22217 vt2 = force_reg (V32QImode, vt2);
22218 /* From mask create two adjusted masks, which contain the same
22219 bits as mask in the low 7 bits of each vector element.
22220 The first mask will have the most significant bit clear
22221 if it requests element from the same 128-bit lane
22222 and MSB set if it requests element from the other 128-bit lane.
22223 The second mask will have the opposite values of the MSB,
22224 and additionally will have its 128-bit lanes swapped.
22225 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22226 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
22227 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22228 stands for other 12 bytes. */
22229 /* The bit whether element is from the same lane or the other
22230 lane is bit 4, so shift it up by 3 to the MSB position. */
22231 t5 = gen_reg_rtx (V4DImode);
22232 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22233 GEN_INT (3)));
22234 /* Clear MSB bits from the mask just in case it had them set. */
22235 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22236 /* After this t1 will have MSB set for elements from other lane. */
22237 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22238 /* Clear bits other than MSB. */
22239 emit_insn (gen_andv32qi3 (t1, t1, vt));
22240 /* Or in the lower bits from mask into t3. */
22241 emit_insn (gen_iorv32qi3 (t3, t1, t2));
22242 /* And invert MSB bits in t1, so MSB is set for elements from the same
22243 lane. */
22244 emit_insn (gen_xorv32qi3 (t1, t1, vt));
22245 /* Swap 128-bit lanes in t3. */
22246 t6 = gen_reg_rtx (V4DImode);
22247 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22248 const2_rtx, GEN_INT (3),
22249 const0_rtx, const1_rtx));
22250 /* And or in the lower bits from mask into t1. */
22251 emit_insn (gen_iorv32qi3 (t1, t1, t2));
22252 if (one_operand_shuffle)
22254 /* Each of these shuffles will put 0s in places where
22255 element from the other 128-bit lane is needed, otherwise
22256 will shuffle in the requested value. */
22257 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22258 gen_lowpart (V32QImode, t6)));
22259 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22260 /* For t3 the 128-bit lanes are swapped again. */
22261 t7 = gen_reg_rtx (V4DImode);
22262 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22263 const2_rtx, GEN_INT (3),
22264 const0_rtx, const1_rtx));
22265 /* And oring both together leads to the result. */
22266 emit_insn (gen_iorv32qi3 (target, t1,
22267 gen_lowpart (V32QImode, t7)));
22268 if (target != operands[0])
22269 emit_move_insn (operands[0],
22270 gen_lowpart (GET_MODE (operands[0]), target));
22271 return;
22274 t4 = gen_reg_rtx (V32QImode);
22275 /* Similarly to the above one_operand_shuffle code,
22276 just for repeated twice for each operand. merge_two:
22277 code will merge the two results together. */
22278 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22279 gen_lowpart (V32QImode, t6)));
22280 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22281 gen_lowpart (V32QImode, t6)));
22282 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22283 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22284 t7 = gen_reg_rtx (V4DImode);
22285 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22286 const2_rtx, GEN_INT (3),
22287 const0_rtx, const1_rtx));
22288 t8 = gen_reg_rtx (V4DImode);
22289 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22290 const2_rtx, GEN_INT (3),
22291 const0_rtx, const1_rtx));
22292 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22293 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22294 t1 = t4;
22295 t2 = t3;
22296 goto merge_two;
22298 default:
22299 gcc_assert (GET_MODE_SIZE (mode) <= 16);
22300 break;
22304 if (TARGET_XOP)
22306 /* The XOP VPPERM insn supports three inputs. By ignoring the
22307 one_operand_shuffle special case, we avoid creating another
22308 set of constant vectors in memory. */
22309 one_operand_shuffle = false;
22311 /* mask = mask & {2*w-1, ...} */
22312 vt = GEN_INT (2*w - 1);
22314 else
22316 /* mask = mask & {w-1, ...} */
22317 vt = GEN_INT (w - 1);
22320 for (i = 0; i < w; i++)
22321 vec[i] = vt;
22322 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22323 mask = expand_simple_binop (maskmode, AND, mask, vt,
22324 NULL_RTX, 0, OPTAB_DIRECT);
22326 /* For non-QImode operations, convert the word permutation control
22327 into a byte permutation control. */
22328 if (mode != V16QImode)
22330 mask = expand_simple_binop (maskmode, ASHIFT, mask,
22331 GEN_INT (exact_log2 (e)),
22332 NULL_RTX, 0, OPTAB_DIRECT);
22334 /* Convert mask to vector of chars. */
22335 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22337 /* Replicate each of the input bytes into byte positions:
22338 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22339 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22340 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
22341 for (i = 0; i < 16; ++i)
22342 vec[i] = GEN_INT (i/e * e);
22343 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22344 vt = validize_mem (force_const_mem (V16QImode, vt));
22345 if (TARGET_XOP)
22346 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22347 else
22348 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22350 /* Convert it into the byte positions by doing
22351 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
22352 for (i = 0; i < 16; ++i)
22353 vec[i] = GEN_INT (i % e);
22354 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22355 vt = validize_mem (force_const_mem (V16QImode, vt));
22356 emit_insn (gen_addv16qi3 (mask, mask, vt));
22359 /* The actual shuffle operations all operate on V16QImode. */
22360 op0 = gen_lowpart (V16QImode, op0);
22361 op1 = gen_lowpart (V16QImode, op1);
22363 if (TARGET_XOP)
22365 if (GET_MODE (target) != V16QImode)
22366 target = gen_reg_rtx (V16QImode);
22367 emit_insn (gen_xop_pperm (target, op0, op1, mask));
22368 if (target != operands[0])
22369 emit_move_insn (operands[0],
22370 gen_lowpart (GET_MODE (operands[0]), target));
22372 else if (one_operand_shuffle)
22374 if (GET_MODE (target) != V16QImode)
22375 target = gen_reg_rtx (V16QImode);
22376 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22377 if (target != operands[0])
22378 emit_move_insn (operands[0],
22379 gen_lowpart (GET_MODE (operands[0]), target));
22381 else
22383 rtx xops[6];
22384 bool ok;
22386 /* Shuffle the two input vectors independently. */
22387 t1 = gen_reg_rtx (V16QImode);
22388 t2 = gen_reg_rtx (V16QImode);
22389 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22390 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22392 merge_two:
22393 /* Then merge them together. The key is whether any given control
22394 element contained a bit set that indicates the second word. */
22395 mask = operands[3];
22396 vt = GEN_INT (w);
22397 if (maskmode == V2DImode && !TARGET_SSE4_1)
22399 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
22400 more shuffle to convert the V2DI input mask into a V4SI
22401 input mask. At which point the masking that expand_int_vcond
22402 will work as desired. */
22403 rtx t3 = gen_reg_rtx (V4SImode);
22404 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22405 const0_rtx, const0_rtx,
22406 const2_rtx, const2_rtx));
22407 mask = t3;
22408 maskmode = V4SImode;
22409 e = w = 4;
22412 for (i = 0; i < w; i++)
22413 vec[i] = vt;
22414 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22415 vt = force_reg (maskmode, vt);
22416 mask = expand_simple_binop (maskmode, AND, mask, vt,
22417 NULL_RTX, 0, OPTAB_DIRECT);
22419 if (GET_MODE (target) != mode)
22420 target = gen_reg_rtx (mode);
22421 xops[0] = target;
22422 xops[1] = gen_lowpart (mode, t2);
22423 xops[2] = gen_lowpart (mode, t1);
22424 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22425 xops[4] = mask;
22426 xops[5] = vt;
22427 ok = ix86_expand_int_vcond (xops);
22428 gcc_assert (ok);
22429 if (target != operands[0])
22430 emit_move_insn (operands[0],
22431 gen_lowpart (GET_MODE (operands[0]), target));
22435 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
22436 true if we should do zero extension, else sign extension. HIGH_P is
22437 true if we want the N/2 high elements, else the low elements. */
22439 void
22440 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22442 machine_mode imode = GET_MODE (src);
22443 rtx tmp;
22445 if (TARGET_SSE4_1)
22447 rtx (*unpack)(rtx, rtx);
22448 rtx (*extract)(rtx, rtx) = NULL;
22449 machine_mode halfmode = BLKmode;
22451 switch (imode)
22453 case V64QImode:
22454 if (unsigned_p)
22455 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22456 else
22457 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22458 halfmode = V32QImode;
22459 extract
22460 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22461 break;
22462 case V32QImode:
22463 if (unsigned_p)
22464 unpack = gen_avx2_zero_extendv16qiv16hi2;
22465 else
22466 unpack = gen_avx2_sign_extendv16qiv16hi2;
22467 halfmode = V16QImode;
22468 extract
22469 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22470 break;
22471 case V32HImode:
22472 if (unsigned_p)
22473 unpack = gen_avx512f_zero_extendv16hiv16si2;
22474 else
22475 unpack = gen_avx512f_sign_extendv16hiv16si2;
22476 halfmode = V16HImode;
22477 extract
22478 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22479 break;
22480 case V16HImode:
22481 if (unsigned_p)
22482 unpack = gen_avx2_zero_extendv8hiv8si2;
22483 else
22484 unpack = gen_avx2_sign_extendv8hiv8si2;
22485 halfmode = V8HImode;
22486 extract
22487 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22488 break;
22489 case V16SImode:
22490 if (unsigned_p)
22491 unpack = gen_avx512f_zero_extendv8siv8di2;
22492 else
22493 unpack = gen_avx512f_sign_extendv8siv8di2;
22494 halfmode = V8SImode;
22495 extract
22496 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22497 break;
22498 case V8SImode:
22499 if (unsigned_p)
22500 unpack = gen_avx2_zero_extendv4siv4di2;
22501 else
22502 unpack = gen_avx2_sign_extendv4siv4di2;
22503 halfmode = V4SImode;
22504 extract
22505 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22506 break;
22507 case V16QImode:
22508 if (unsigned_p)
22509 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22510 else
22511 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22512 break;
22513 case V8HImode:
22514 if (unsigned_p)
22515 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22516 else
22517 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22518 break;
22519 case V4SImode:
22520 if (unsigned_p)
22521 unpack = gen_sse4_1_zero_extendv2siv2di2;
22522 else
22523 unpack = gen_sse4_1_sign_extendv2siv2di2;
22524 break;
22525 default:
22526 gcc_unreachable ();
22529 if (GET_MODE_SIZE (imode) >= 32)
22531 tmp = gen_reg_rtx (halfmode);
22532 emit_insn (extract (tmp, src));
22534 else if (high_p)
22536 /* Shift higher 8 bytes to lower 8 bytes. */
22537 tmp = gen_reg_rtx (V1TImode);
22538 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22539 GEN_INT (64)));
22540 tmp = gen_lowpart (imode, tmp);
22542 else
22543 tmp = src;
22545 emit_insn (unpack (dest, tmp));
22547 else
22549 rtx (*unpack)(rtx, rtx, rtx);
22551 switch (imode)
22553 case V16QImode:
22554 if (high_p)
22555 unpack = gen_vec_interleave_highv16qi;
22556 else
22557 unpack = gen_vec_interleave_lowv16qi;
22558 break;
22559 case V8HImode:
22560 if (high_p)
22561 unpack = gen_vec_interleave_highv8hi;
22562 else
22563 unpack = gen_vec_interleave_lowv8hi;
22564 break;
22565 case V4SImode:
22566 if (high_p)
22567 unpack = gen_vec_interleave_highv4si;
22568 else
22569 unpack = gen_vec_interleave_lowv4si;
22570 break;
22571 default:
22572 gcc_unreachable ();
22575 if (unsigned_p)
22576 tmp = force_reg (imode, CONST0_RTX (imode));
22577 else
22578 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22579 src, pc_rtx, pc_rtx);
22581 rtx tmp2 = gen_reg_rtx (imode);
22582 emit_insn (unpack (tmp2, src, tmp));
22583 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22587 /* Expand conditional increment or decrement using adb/sbb instructions.
22588 The default case using setcc followed by the conditional move can be
22589 done by generic code. */
22590 bool
22591 ix86_expand_int_addcc (rtx operands[])
22593 enum rtx_code code = GET_CODE (operands[1]);
22594 rtx flags;
22595 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22596 rtx compare_op;
22597 rtx val = const0_rtx;
22598 bool fpcmp = false;
22599 machine_mode mode;
22600 rtx op0 = XEXP (operands[1], 0);
22601 rtx op1 = XEXP (operands[1], 1);
22603 if (operands[3] != const1_rtx
22604 && operands[3] != constm1_rtx)
22605 return false;
22606 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22607 return false;
22608 code = GET_CODE (compare_op);
22610 flags = XEXP (compare_op, 0);
22612 if (GET_MODE (flags) == CCFPmode
22613 || GET_MODE (flags) == CCFPUmode)
22615 fpcmp = true;
22616 code = ix86_fp_compare_code_to_integer (code);
22619 if (code != LTU)
22621 val = constm1_rtx;
22622 if (fpcmp)
22623 PUT_CODE (compare_op,
22624 reverse_condition_maybe_unordered
22625 (GET_CODE (compare_op)));
22626 else
22627 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22630 mode = GET_MODE (operands[0]);
22632 /* Construct either adc or sbb insn. */
22633 if ((code == LTU) == (operands[3] == constm1_rtx))
22635 switch (mode)
22637 case QImode:
22638 insn = gen_subqi3_carry;
22639 break;
22640 case HImode:
22641 insn = gen_subhi3_carry;
22642 break;
22643 case SImode:
22644 insn = gen_subsi3_carry;
22645 break;
22646 case DImode:
22647 insn = gen_subdi3_carry;
22648 break;
22649 default:
22650 gcc_unreachable ();
22653 else
22655 switch (mode)
22657 case QImode:
22658 insn = gen_addqi3_carry;
22659 break;
22660 case HImode:
22661 insn = gen_addhi3_carry;
22662 break;
22663 case SImode:
22664 insn = gen_addsi3_carry;
22665 break;
22666 case DImode:
22667 insn = gen_adddi3_carry;
22668 break;
22669 default:
22670 gcc_unreachable ();
22673 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22675 return true;
22679 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22680 but works for floating pointer parameters and nonoffsetable memories.
22681 For pushes, it returns just stack offsets; the values will be saved
22682 in the right order. Maximally three parts are generated. */
22684 static int
22685 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22687 int size;
22689 if (!TARGET_64BIT)
22690 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22691 else
22692 size = (GET_MODE_SIZE (mode) + 4) / 8;
22694 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22695 gcc_assert (size >= 2 && size <= 4);
22697 /* Optimize constant pool reference to immediates. This is used by fp
22698 moves, that force all constants to memory to allow combining. */
22699 if (MEM_P (operand) && MEM_READONLY_P (operand))
22701 rtx tmp = maybe_get_pool_constant (operand);
22702 if (tmp)
22703 operand = tmp;
22706 if (MEM_P (operand) && !offsettable_memref_p (operand))
22708 /* The only non-offsetable memories we handle are pushes. */
22709 int ok = push_operand (operand, VOIDmode);
22711 gcc_assert (ok);
22713 operand = copy_rtx (operand);
22714 PUT_MODE (operand, word_mode);
22715 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22716 return size;
22719 if (GET_CODE (operand) == CONST_VECTOR)
22721 machine_mode imode = int_mode_for_mode (mode);
22722 /* Caution: if we looked through a constant pool memory above,
22723 the operand may actually have a different mode now. That's
22724 ok, since we want to pun this all the way back to an integer. */
22725 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22726 gcc_assert (operand != NULL);
22727 mode = imode;
22730 if (!TARGET_64BIT)
22732 if (mode == DImode)
22733 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22734 else
22736 int i;
22738 if (REG_P (operand))
22740 gcc_assert (reload_completed);
22741 for (i = 0; i < size; i++)
22742 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22744 else if (offsettable_memref_p (operand))
22746 operand = adjust_address (operand, SImode, 0);
22747 parts[0] = operand;
22748 for (i = 1; i < size; i++)
22749 parts[i] = adjust_address (operand, SImode, 4 * i);
22751 else if (GET_CODE (operand) == CONST_DOUBLE)
22753 REAL_VALUE_TYPE r;
22754 long l[4];
22756 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22757 switch (mode)
22759 case TFmode:
22760 real_to_target (l, &r, mode);
22761 parts[3] = gen_int_mode (l[3], SImode);
22762 parts[2] = gen_int_mode (l[2], SImode);
22763 break;
22764 case XFmode:
22765 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22766 long double may not be 80-bit. */
22767 real_to_target (l, &r, mode);
22768 parts[2] = gen_int_mode (l[2], SImode);
22769 break;
22770 case DFmode:
22771 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22772 break;
22773 default:
22774 gcc_unreachable ();
22776 parts[1] = gen_int_mode (l[1], SImode);
22777 parts[0] = gen_int_mode (l[0], SImode);
22779 else
22780 gcc_unreachable ();
22783 else
22785 if (mode == TImode)
22786 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22787 if (mode == XFmode || mode == TFmode)
22789 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22790 if (REG_P (operand))
22792 gcc_assert (reload_completed);
22793 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22794 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22796 else if (offsettable_memref_p (operand))
22798 operand = adjust_address (operand, DImode, 0);
22799 parts[0] = operand;
22800 parts[1] = adjust_address (operand, upper_mode, 8);
22802 else if (GET_CODE (operand) == CONST_DOUBLE)
22804 REAL_VALUE_TYPE r;
22805 long l[4];
22807 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22808 real_to_target (l, &r, mode);
22810 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22811 if (HOST_BITS_PER_WIDE_INT >= 64)
22812 parts[0]
22813 = gen_int_mode
22814 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22815 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22816 DImode);
22817 else
22818 parts[0] = immed_double_const (l[0], l[1], DImode);
22820 if (upper_mode == SImode)
22821 parts[1] = gen_int_mode (l[2], SImode);
22822 else if (HOST_BITS_PER_WIDE_INT >= 64)
22823 parts[1]
22824 = gen_int_mode
22825 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22826 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22827 DImode);
22828 else
22829 parts[1] = immed_double_const (l[2], l[3], DImode);
22831 else
22832 gcc_unreachable ();
22836 return size;
22839 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22840 Return false when normal moves are needed; true when all required
22841 insns have been emitted. Operands 2-4 contain the input values
22842 int the correct order; operands 5-7 contain the output values. */
22844 void
22845 ix86_split_long_move (rtx operands[])
22847 rtx part[2][4];
22848 int nparts, i, j;
22849 int push = 0;
22850 int collisions = 0;
22851 machine_mode mode = GET_MODE (operands[0]);
22852 bool collisionparts[4];
22854 /* The DFmode expanders may ask us to move double.
22855 For 64bit target this is single move. By hiding the fact
22856 here we simplify i386.md splitters. */
22857 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22859 /* Optimize constant pool reference to immediates. This is used by
22860 fp moves, that force all constants to memory to allow combining. */
22862 if (MEM_P (operands[1])
22863 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22864 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22865 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22866 if (push_operand (operands[0], VOIDmode))
22868 operands[0] = copy_rtx (operands[0]);
22869 PUT_MODE (operands[0], word_mode);
22871 else
22872 operands[0] = gen_lowpart (DImode, operands[0]);
22873 operands[1] = gen_lowpart (DImode, operands[1]);
22874 emit_move_insn (operands[0], operands[1]);
22875 return;
22878 /* The only non-offsettable memory we handle is push. */
22879 if (push_operand (operands[0], VOIDmode))
22880 push = 1;
22881 else
22882 gcc_assert (!MEM_P (operands[0])
22883 || offsettable_memref_p (operands[0]));
22885 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22886 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22888 /* When emitting push, take care for source operands on the stack. */
22889 if (push && MEM_P (operands[1])
22890 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22892 rtx src_base = XEXP (part[1][nparts - 1], 0);
22894 /* Compensate for the stack decrement by 4. */
22895 if (!TARGET_64BIT && nparts == 3
22896 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22897 src_base = plus_constant (Pmode, src_base, 4);
22899 /* src_base refers to the stack pointer and is
22900 automatically decreased by emitted push. */
22901 for (i = 0; i < nparts; i++)
22902 part[1][i] = change_address (part[1][i],
22903 GET_MODE (part[1][i]), src_base);
22906 /* We need to do copy in the right order in case an address register
22907 of the source overlaps the destination. */
22908 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22910 rtx tmp;
22912 for (i = 0; i < nparts; i++)
22914 collisionparts[i]
22915 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22916 if (collisionparts[i])
22917 collisions++;
22920 /* Collision in the middle part can be handled by reordering. */
22921 if (collisions == 1 && nparts == 3 && collisionparts [1])
22923 std::swap (part[0][1], part[0][2]);
22924 std::swap (part[1][1], part[1][2]);
22926 else if (collisions == 1
22927 && nparts == 4
22928 && (collisionparts [1] || collisionparts [2]))
22930 if (collisionparts [1])
22932 std::swap (part[0][1], part[0][2]);
22933 std::swap (part[1][1], part[1][2]);
22935 else
22937 std::swap (part[0][2], part[0][3]);
22938 std::swap (part[1][2], part[1][3]);
22942 /* If there are more collisions, we can't handle it by reordering.
22943 Do an lea to the last part and use only one colliding move. */
22944 else if (collisions > 1)
22946 rtx base;
22948 collisions = 1;
22950 base = part[0][nparts - 1];
22952 /* Handle the case when the last part isn't valid for lea.
22953 Happens in 64-bit mode storing the 12-byte XFmode. */
22954 if (GET_MODE (base) != Pmode)
22955 base = gen_rtx_REG (Pmode, REGNO (base));
22957 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22958 part[1][0] = replace_equiv_address (part[1][0], base);
22959 for (i = 1; i < nparts; i++)
22961 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22962 part[1][i] = replace_equiv_address (part[1][i], tmp);
22967 if (push)
22969 if (!TARGET_64BIT)
22971 if (nparts == 3)
22973 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22974 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22975 stack_pointer_rtx, GEN_INT (-4)));
22976 emit_move_insn (part[0][2], part[1][2]);
22978 else if (nparts == 4)
22980 emit_move_insn (part[0][3], part[1][3]);
22981 emit_move_insn (part[0][2], part[1][2]);
22984 else
22986 /* In 64bit mode we don't have 32bit push available. In case this is
22987 register, it is OK - we will just use larger counterpart. We also
22988 retype memory - these comes from attempt to avoid REX prefix on
22989 moving of second half of TFmode value. */
22990 if (GET_MODE (part[1][1]) == SImode)
22992 switch (GET_CODE (part[1][1]))
22994 case MEM:
22995 part[1][1] = adjust_address (part[1][1], DImode, 0);
22996 break;
22998 case REG:
22999 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
23000 break;
23002 default:
23003 gcc_unreachable ();
23006 if (GET_MODE (part[1][0]) == SImode)
23007 part[1][0] = part[1][1];
23010 emit_move_insn (part[0][1], part[1][1]);
23011 emit_move_insn (part[0][0], part[1][0]);
23012 return;
23015 /* Choose correct order to not overwrite the source before it is copied. */
23016 if ((REG_P (part[0][0])
23017 && REG_P (part[1][1])
23018 && (REGNO (part[0][0]) == REGNO (part[1][1])
23019 || (nparts == 3
23020 && REGNO (part[0][0]) == REGNO (part[1][2]))
23021 || (nparts == 4
23022 && REGNO (part[0][0]) == REGNO (part[1][3]))))
23023 || (collisions > 0
23024 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
23026 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
23028 operands[2 + i] = part[0][j];
23029 operands[6 + i] = part[1][j];
23032 else
23034 for (i = 0; i < nparts; i++)
23036 operands[2 + i] = part[0][i];
23037 operands[6 + i] = part[1][i];
23041 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
23042 if (optimize_insn_for_size_p ())
23044 for (j = 0; j < nparts - 1; j++)
23045 if (CONST_INT_P (operands[6 + j])
23046 && operands[6 + j] != const0_rtx
23047 && REG_P (operands[2 + j]))
23048 for (i = j; i < nparts - 1; i++)
23049 if (CONST_INT_P (operands[7 + i])
23050 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
23051 operands[7 + i] = operands[2 + j];
23054 for (i = 0; i < nparts; i++)
23055 emit_move_insn (operands[2 + i], operands[6 + i]);
23057 return;
23060 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
23061 left shift by a constant, either using a single shift or
23062 a sequence of add instructions. */
23064 static void
23065 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
23067 rtx (*insn)(rtx, rtx, rtx);
23069 if (count == 1
23070 || (count * ix86_cost->add <= ix86_cost->shift_const
23071 && !optimize_insn_for_size_p ()))
23073 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
23074 while (count-- > 0)
23075 emit_insn (insn (operand, operand, operand));
23077 else
23079 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23080 emit_insn (insn (operand, operand, GEN_INT (count)));
23084 void
23085 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
23087 rtx (*gen_ashl3)(rtx, rtx, rtx);
23088 rtx (*gen_shld)(rtx, rtx, rtx);
23089 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23091 rtx low[2], high[2];
23092 int count;
23094 if (CONST_INT_P (operands[2]))
23096 split_double_mode (mode, operands, 2, low, high);
23097 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23099 if (count >= half_width)
23101 emit_move_insn (high[0], low[1]);
23102 emit_move_insn (low[0], const0_rtx);
23104 if (count > half_width)
23105 ix86_expand_ashl_const (high[0], count - half_width, mode);
23107 else
23109 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23111 if (!rtx_equal_p (operands[0], operands[1]))
23112 emit_move_insn (operands[0], operands[1]);
23114 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
23115 ix86_expand_ashl_const (low[0], count, mode);
23117 return;
23120 split_double_mode (mode, operands, 1, low, high);
23122 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23124 if (operands[1] == const1_rtx)
23126 /* Assuming we've chosen a QImode capable registers, then 1 << N
23127 can be done with two 32/64-bit shifts, no branches, no cmoves. */
23128 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
23130 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
23132 ix86_expand_clear (low[0]);
23133 ix86_expand_clear (high[0]);
23134 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
23136 d = gen_lowpart (QImode, low[0]);
23137 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23138 s = gen_rtx_EQ (QImode, flags, const0_rtx);
23139 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23141 d = gen_lowpart (QImode, high[0]);
23142 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23143 s = gen_rtx_NE (QImode, flags, const0_rtx);
23144 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23147 /* Otherwise, we can get the same results by manually performing
23148 a bit extract operation on bit 5/6, and then performing the two
23149 shifts. The two methods of getting 0/1 into low/high are exactly
23150 the same size. Avoiding the shift in the bit extract case helps
23151 pentium4 a bit; no one else seems to care much either way. */
23152 else
23154 machine_mode half_mode;
23155 rtx (*gen_lshr3)(rtx, rtx, rtx);
23156 rtx (*gen_and3)(rtx, rtx, rtx);
23157 rtx (*gen_xor3)(rtx, rtx, rtx);
23158 HOST_WIDE_INT bits;
23159 rtx x;
23161 if (mode == DImode)
23163 half_mode = SImode;
23164 gen_lshr3 = gen_lshrsi3;
23165 gen_and3 = gen_andsi3;
23166 gen_xor3 = gen_xorsi3;
23167 bits = 5;
23169 else
23171 half_mode = DImode;
23172 gen_lshr3 = gen_lshrdi3;
23173 gen_and3 = gen_anddi3;
23174 gen_xor3 = gen_xordi3;
23175 bits = 6;
23178 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
23179 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
23180 else
23181 x = gen_lowpart (half_mode, operands[2]);
23182 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
23184 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
23185 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
23186 emit_move_insn (low[0], high[0]);
23187 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
23190 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23191 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
23192 return;
23195 if (operands[1] == constm1_rtx)
23197 /* For -1 << N, we can avoid the shld instruction, because we
23198 know that we're shifting 0...31/63 ones into a -1. */
23199 emit_move_insn (low[0], constm1_rtx);
23200 if (optimize_insn_for_size_p ())
23201 emit_move_insn (high[0], low[0]);
23202 else
23203 emit_move_insn (high[0], constm1_rtx);
23205 else
23207 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23209 if (!rtx_equal_p (operands[0], operands[1]))
23210 emit_move_insn (operands[0], operands[1]);
23212 split_double_mode (mode, operands, 1, low, high);
23213 emit_insn (gen_shld (high[0], low[0], operands[2]));
23216 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23218 if (TARGET_CMOVE && scratch)
23220 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23221 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23223 ix86_expand_clear (scratch);
23224 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23226 else
23228 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23229 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23231 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23235 void
23236 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23238 rtx (*gen_ashr3)(rtx, rtx, rtx)
23239 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23240 rtx (*gen_shrd)(rtx, rtx, rtx);
23241 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23243 rtx low[2], high[2];
23244 int count;
23246 if (CONST_INT_P (operands[2]))
23248 split_double_mode (mode, operands, 2, low, high);
23249 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23251 if (count == GET_MODE_BITSIZE (mode) - 1)
23253 emit_move_insn (high[0], high[1]);
23254 emit_insn (gen_ashr3 (high[0], high[0],
23255 GEN_INT (half_width - 1)));
23256 emit_move_insn (low[0], high[0]);
23259 else if (count >= half_width)
23261 emit_move_insn (low[0], high[1]);
23262 emit_move_insn (high[0], low[0]);
23263 emit_insn (gen_ashr3 (high[0], high[0],
23264 GEN_INT (half_width - 1)));
23266 if (count > half_width)
23267 emit_insn (gen_ashr3 (low[0], low[0],
23268 GEN_INT (count - half_width)));
23270 else
23272 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23274 if (!rtx_equal_p (operands[0], operands[1]))
23275 emit_move_insn (operands[0], operands[1]);
23277 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23278 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23281 else
23283 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23285 if (!rtx_equal_p (operands[0], operands[1]))
23286 emit_move_insn (operands[0], operands[1]);
23288 split_double_mode (mode, operands, 1, low, high);
23290 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23291 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23293 if (TARGET_CMOVE && scratch)
23295 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23296 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23298 emit_move_insn (scratch, high[0]);
23299 emit_insn (gen_ashr3 (scratch, scratch,
23300 GEN_INT (half_width - 1)));
23301 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23302 scratch));
23304 else
23306 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23307 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23309 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23314 void
23315 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23317 rtx (*gen_lshr3)(rtx, rtx, rtx)
23318 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23319 rtx (*gen_shrd)(rtx, rtx, rtx);
23320 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23322 rtx low[2], high[2];
23323 int count;
23325 if (CONST_INT_P (operands[2]))
23327 split_double_mode (mode, operands, 2, low, high);
23328 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23330 if (count >= half_width)
23332 emit_move_insn (low[0], high[1]);
23333 ix86_expand_clear (high[0]);
23335 if (count > half_width)
23336 emit_insn (gen_lshr3 (low[0], low[0],
23337 GEN_INT (count - half_width)));
23339 else
23341 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23343 if (!rtx_equal_p (operands[0], operands[1]))
23344 emit_move_insn (operands[0], operands[1]);
23346 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23347 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23350 else
23352 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23354 if (!rtx_equal_p (operands[0], operands[1]))
23355 emit_move_insn (operands[0], operands[1]);
23357 split_double_mode (mode, operands, 1, low, high);
23359 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23360 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23362 if (TARGET_CMOVE && scratch)
23364 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23365 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23367 ix86_expand_clear (scratch);
23368 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23369 scratch));
23371 else
23373 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23374 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23376 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23381 /* Predict just emitted jump instruction to be taken with probability PROB. */
23382 static void
23383 predict_jump (int prob)
23385 rtx insn = get_last_insn ();
23386 gcc_assert (JUMP_P (insn));
23387 add_int_reg_note (insn, REG_BR_PROB, prob);
23390 /* Helper function for the string operations below. Dest VARIABLE whether
23391 it is aligned to VALUE bytes. If true, jump to the label. */
23392 static rtx_code_label *
23393 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23395 rtx_code_label *label = gen_label_rtx ();
23396 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23397 if (GET_MODE (variable) == DImode)
23398 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23399 else
23400 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23401 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23402 1, label);
23403 if (epilogue)
23404 predict_jump (REG_BR_PROB_BASE * 50 / 100);
23405 else
23406 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23407 return label;
23410 /* Adjust COUNTER by the VALUE. */
23411 static void
23412 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23414 rtx (*gen_add)(rtx, rtx, rtx)
23415 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23417 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23420 /* Zero extend possibly SImode EXP to Pmode register. */
23422 ix86_zero_extend_to_Pmode (rtx exp)
23424 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23427 /* Divide COUNTREG by SCALE. */
23428 static rtx
23429 scale_counter (rtx countreg, int scale)
23431 rtx sc;
23433 if (scale == 1)
23434 return countreg;
23435 if (CONST_INT_P (countreg))
23436 return GEN_INT (INTVAL (countreg) / scale);
23437 gcc_assert (REG_P (countreg));
23439 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23440 GEN_INT (exact_log2 (scale)),
23441 NULL, 1, OPTAB_DIRECT);
23442 return sc;
23445 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
23446 DImode for constant loop counts. */
23448 static machine_mode
23449 counter_mode (rtx count_exp)
23451 if (GET_MODE (count_exp) != VOIDmode)
23452 return GET_MODE (count_exp);
23453 if (!CONST_INT_P (count_exp))
23454 return Pmode;
23455 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23456 return DImode;
23457 return SImode;
23460 /* Copy the address to a Pmode register. This is used for x32 to
23461 truncate DImode TLS address to a SImode register. */
23463 static rtx
23464 ix86_copy_addr_to_reg (rtx addr)
23466 rtx reg;
23467 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23469 reg = copy_addr_to_reg (addr);
23470 REG_POINTER (reg) = 1;
23471 return reg;
23473 else
23475 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23476 reg = copy_to_mode_reg (DImode, addr);
23477 REG_POINTER (reg) = 1;
23478 return gen_rtx_SUBREG (SImode, reg, 0);
23482 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23483 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23484 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23485 memory by VALUE (supposed to be in MODE).
23487 The size is rounded down to whole number of chunk size moved at once.
23488 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23491 static void
23492 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23493 rtx destptr, rtx srcptr, rtx value,
23494 rtx count, machine_mode mode, int unroll,
23495 int expected_size, bool issetmem)
23497 rtx_code_label *out_label, *top_label;
23498 rtx iter, tmp;
23499 machine_mode iter_mode = counter_mode (count);
23500 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23501 rtx piece_size = GEN_INT (piece_size_n);
23502 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23503 rtx size;
23504 int i;
23506 top_label = gen_label_rtx ();
23507 out_label = gen_label_rtx ();
23508 iter = gen_reg_rtx (iter_mode);
23510 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23511 NULL, 1, OPTAB_DIRECT);
23512 /* Those two should combine. */
23513 if (piece_size == const1_rtx)
23515 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23516 true, out_label);
23517 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23519 emit_move_insn (iter, const0_rtx);
23521 emit_label (top_label);
23523 tmp = convert_modes (Pmode, iter_mode, iter, true);
23525 /* This assert could be relaxed - in this case we'll need to compute
23526 smallest power of two, containing in PIECE_SIZE_N and pass it to
23527 offset_address. */
23528 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23529 destmem = offset_address (destmem, tmp, piece_size_n);
23530 destmem = adjust_address (destmem, mode, 0);
23532 if (!issetmem)
23534 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23535 srcmem = adjust_address (srcmem, mode, 0);
23537 /* When unrolling for chips that reorder memory reads and writes,
23538 we can save registers by using single temporary.
23539 Also using 4 temporaries is overkill in 32bit mode. */
23540 if (!TARGET_64BIT && 0)
23542 for (i = 0; i < unroll; i++)
23544 if (i)
23546 destmem =
23547 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23548 srcmem =
23549 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23551 emit_move_insn (destmem, srcmem);
23554 else
23556 rtx tmpreg[4];
23557 gcc_assert (unroll <= 4);
23558 for (i = 0; i < unroll; i++)
23560 tmpreg[i] = gen_reg_rtx (mode);
23561 if (i)
23563 srcmem =
23564 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23566 emit_move_insn (tmpreg[i], srcmem);
23568 for (i = 0; i < unroll; i++)
23570 if (i)
23572 destmem =
23573 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23575 emit_move_insn (destmem, tmpreg[i]);
23579 else
23580 for (i = 0; i < unroll; i++)
23582 if (i)
23583 destmem =
23584 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23585 emit_move_insn (destmem, value);
23588 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23589 true, OPTAB_LIB_WIDEN);
23590 if (tmp != iter)
23591 emit_move_insn (iter, tmp);
23593 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23594 true, top_label);
23595 if (expected_size != -1)
23597 expected_size /= GET_MODE_SIZE (mode) * unroll;
23598 if (expected_size == 0)
23599 predict_jump (0);
23600 else if (expected_size > REG_BR_PROB_BASE)
23601 predict_jump (REG_BR_PROB_BASE - 1);
23602 else
23603 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23605 else
23606 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23607 iter = ix86_zero_extend_to_Pmode (iter);
23608 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23609 true, OPTAB_LIB_WIDEN);
23610 if (tmp != destptr)
23611 emit_move_insn (destptr, tmp);
23612 if (!issetmem)
23614 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23615 true, OPTAB_LIB_WIDEN);
23616 if (tmp != srcptr)
23617 emit_move_insn (srcptr, tmp);
23619 emit_label (out_label);
23622 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23623 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23624 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23625 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23626 ORIG_VALUE is the original value passed to memset to fill the memory with.
23627 Other arguments have same meaning as for previous function. */
23629 static void
23630 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23631 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23632 rtx count,
23633 machine_mode mode, bool issetmem)
23635 rtx destexp;
23636 rtx srcexp;
23637 rtx countreg;
23638 HOST_WIDE_INT rounded_count;
23640 /* If possible, it is shorter to use rep movs.
23641 TODO: Maybe it is better to move this logic to decide_alg. */
23642 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23643 && (!issetmem || orig_value == const0_rtx))
23644 mode = SImode;
23646 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23647 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23649 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23650 GET_MODE_SIZE (mode)));
23651 if (mode != QImode)
23653 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23654 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23655 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23657 else
23658 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23659 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23661 rounded_count = (INTVAL (count)
23662 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23663 destmem = shallow_copy_rtx (destmem);
23664 set_mem_size (destmem, rounded_count);
23666 else if (MEM_SIZE_KNOWN_P (destmem))
23667 clear_mem_size (destmem);
23669 if (issetmem)
23671 value = force_reg (mode, gen_lowpart (mode, value));
23672 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23674 else
23676 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23677 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23678 if (mode != QImode)
23680 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23681 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23682 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23684 else
23685 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23686 if (CONST_INT_P (count))
23688 rounded_count = (INTVAL (count)
23689 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23690 srcmem = shallow_copy_rtx (srcmem);
23691 set_mem_size (srcmem, rounded_count);
23693 else
23695 if (MEM_SIZE_KNOWN_P (srcmem))
23696 clear_mem_size (srcmem);
23698 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23699 destexp, srcexp));
23703 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23704 DESTMEM.
23705 SRC is passed by pointer to be updated on return.
23706 Return value is updated DST. */
23707 static rtx
23708 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23709 HOST_WIDE_INT size_to_move)
23711 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23712 enum insn_code code;
23713 machine_mode move_mode;
23714 int piece_size, i;
23716 /* Find the widest mode in which we could perform moves.
23717 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23718 it until move of such size is supported. */
23719 piece_size = 1 << floor_log2 (size_to_move);
23720 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23721 code = optab_handler (mov_optab, move_mode);
23722 while (code == CODE_FOR_nothing && piece_size > 1)
23724 piece_size >>= 1;
23725 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23726 code = optab_handler (mov_optab, move_mode);
23729 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23730 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23731 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23733 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23734 move_mode = mode_for_vector (word_mode, nunits);
23735 code = optab_handler (mov_optab, move_mode);
23736 if (code == CODE_FOR_nothing)
23738 move_mode = word_mode;
23739 piece_size = GET_MODE_SIZE (move_mode);
23740 code = optab_handler (mov_optab, move_mode);
23743 gcc_assert (code != CODE_FOR_nothing);
23745 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23746 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23748 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23749 gcc_assert (size_to_move % piece_size == 0);
23750 adjust = GEN_INT (piece_size);
23751 for (i = 0; i < size_to_move; i += piece_size)
23753 /* We move from memory to memory, so we'll need to do it via
23754 a temporary register. */
23755 tempreg = gen_reg_rtx (move_mode);
23756 emit_insn (GEN_FCN (code) (tempreg, src));
23757 emit_insn (GEN_FCN (code) (dst, tempreg));
23759 emit_move_insn (destptr,
23760 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23761 emit_move_insn (srcptr,
23762 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23764 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23765 piece_size);
23766 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23767 piece_size);
23770 /* Update DST and SRC rtx. */
23771 *srcmem = src;
23772 return dst;
23775 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23776 static void
23777 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23778 rtx destptr, rtx srcptr, rtx count, int max_size)
23780 rtx src, dest;
23781 if (CONST_INT_P (count))
23783 HOST_WIDE_INT countval = INTVAL (count);
23784 HOST_WIDE_INT epilogue_size = countval % max_size;
23785 int i;
23787 /* For now MAX_SIZE should be a power of 2. This assert could be
23788 relaxed, but it'll require a bit more complicated epilogue
23789 expanding. */
23790 gcc_assert ((max_size & (max_size - 1)) == 0);
23791 for (i = max_size; i >= 1; i >>= 1)
23793 if (epilogue_size & i)
23794 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23796 return;
23798 if (max_size > 8)
23800 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23801 count, 1, OPTAB_DIRECT);
23802 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23803 count, QImode, 1, 4, false);
23804 return;
23807 /* When there are stringops, we can cheaply increase dest and src pointers.
23808 Otherwise we save code size by maintaining offset (zero is readily
23809 available from preceding rep operation) and using x86 addressing modes.
23811 if (TARGET_SINGLE_STRINGOP)
23813 if (max_size > 4)
23815 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23816 src = change_address (srcmem, SImode, srcptr);
23817 dest = change_address (destmem, SImode, destptr);
23818 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23819 emit_label (label);
23820 LABEL_NUSES (label) = 1;
23822 if (max_size > 2)
23824 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23825 src = change_address (srcmem, HImode, srcptr);
23826 dest = change_address (destmem, HImode, destptr);
23827 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23828 emit_label (label);
23829 LABEL_NUSES (label) = 1;
23831 if (max_size > 1)
23833 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23834 src = change_address (srcmem, QImode, srcptr);
23835 dest = change_address (destmem, QImode, destptr);
23836 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23837 emit_label (label);
23838 LABEL_NUSES (label) = 1;
23841 else
23843 rtx offset = force_reg (Pmode, const0_rtx);
23844 rtx tmp;
23846 if (max_size > 4)
23848 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23849 src = change_address (srcmem, SImode, srcptr);
23850 dest = change_address (destmem, SImode, destptr);
23851 emit_move_insn (dest, src);
23852 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23853 true, OPTAB_LIB_WIDEN);
23854 if (tmp != offset)
23855 emit_move_insn (offset, tmp);
23856 emit_label (label);
23857 LABEL_NUSES (label) = 1;
23859 if (max_size > 2)
23861 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23862 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23863 src = change_address (srcmem, HImode, tmp);
23864 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23865 dest = change_address (destmem, HImode, tmp);
23866 emit_move_insn (dest, src);
23867 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23868 true, OPTAB_LIB_WIDEN);
23869 if (tmp != offset)
23870 emit_move_insn (offset, tmp);
23871 emit_label (label);
23872 LABEL_NUSES (label) = 1;
23874 if (max_size > 1)
23876 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23877 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23878 src = change_address (srcmem, QImode, tmp);
23879 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23880 dest = change_address (destmem, QImode, tmp);
23881 emit_move_insn (dest, src);
23882 emit_label (label);
23883 LABEL_NUSES (label) = 1;
23888 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23889 with value PROMOTED_VAL.
23890 SRC is passed by pointer to be updated on return.
23891 Return value is updated DST. */
23892 static rtx
23893 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23894 HOST_WIDE_INT size_to_move)
23896 rtx dst = destmem, adjust;
23897 enum insn_code code;
23898 machine_mode move_mode;
23899 int piece_size, i;
23901 /* Find the widest mode in which we could perform moves.
23902 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23903 it until move of such size is supported. */
23904 move_mode = GET_MODE (promoted_val);
23905 if (move_mode == VOIDmode)
23906 move_mode = QImode;
23907 if (size_to_move < GET_MODE_SIZE (move_mode))
23909 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23910 promoted_val = gen_lowpart (move_mode, promoted_val);
23912 piece_size = GET_MODE_SIZE (move_mode);
23913 code = optab_handler (mov_optab, move_mode);
23914 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23916 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23918 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23919 gcc_assert (size_to_move % piece_size == 0);
23920 adjust = GEN_INT (piece_size);
23921 for (i = 0; i < size_to_move; i += piece_size)
23923 if (piece_size <= GET_MODE_SIZE (word_mode))
23925 emit_insn (gen_strset (destptr, dst, promoted_val));
23926 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23927 piece_size);
23928 continue;
23931 emit_insn (GEN_FCN (code) (dst, promoted_val));
23933 emit_move_insn (destptr,
23934 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23936 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23937 piece_size);
23940 /* Update DST rtx. */
23941 return dst;
23943 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23944 static void
23945 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23946 rtx count, int max_size)
23948 count =
23949 expand_simple_binop (counter_mode (count), AND, count,
23950 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23951 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23952 gen_lowpart (QImode, value), count, QImode,
23953 1, max_size / 2, true);
23956 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23957 static void
23958 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23959 rtx count, int max_size)
23961 rtx dest;
23963 if (CONST_INT_P (count))
23965 HOST_WIDE_INT countval = INTVAL (count);
23966 HOST_WIDE_INT epilogue_size = countval % max_size;
23967 int i;
23969 /* For now MAX_SIZE should be a power of 2. This assert could be
23970 relaxed, but it'll require a bit more complicated epilogue
23971 expanding. */
23972 gcc_assert ((max_size & (max_size - 1)) == 0);
23973 for (i = max_size; i >= 1; i >>= 1)
23975 if (epilogue_size & i)
23977 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23978 destmem = emit_memset (destmem, destptr, vec_value, i);
23979 else
23980 destmem = emit_memset (destmem, destptr, value, i);
23983 return;
23985 if (max_size > 32)
23987 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23988 return;
23990 if (max_size > 16)
23992 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23993 if (TARGET_64BIT)
23995 dest = change_address (destmem, DImode, destptr);
23996 emit_insn (gen_strset (destptr, dest, value));
23997 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23998 emit_insn (gen_strset (destptr, dest, value));
24000 else
24002 dest = change_address (destmem, SImode, destptr);
24003 emit_insn (gen_strset (destptr, dest, value));
24004 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
24005 emit_insn (gen_strset (destptr, dest, value));
24006 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
24007 emit_insn (gen_strset (destptr, dest, value));
24008 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
24009 emit_insn (gen_strset (destptr, dest, value));
24011 emit_label (label);
24012 LABEL_NUSES (label) = 1;
24014 if (max_size > 8)
24016 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
24017 if (TARGET_64BIT)
24019 dest = change_address (destmem, DImode, destptr);
24020 emit_insn (gen_strset (destptr, dest, value));
24022 else
24024 dest = change_address (destmem, SImode, destptr);
24025 emit_insn (gen_strset (destptr, dest, value));
24026 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
24027 emit_insn (gen_strset (destptr, dest, value));
24029 emit_label (label);
24030 LABEL_NUSES (label) = 1;
24032 if (max_size > 4)
24034 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
24035 dest = change_address (destmem, SImode, destptr);
24036 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
24037 emit_label (label);
24038 LABEL_NUSES (label) = 1;
24040 if (max_size > 2)
24042 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
24043 dest = change_address (destmem, HImode, destptr);
24044 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
24045 emit_label (label);
24046 LABEL_NUSES (label) = 1;
24048 if (max_size > 1)
24050 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
24051 dest = change_address (destmem, QImode, destptr);
24052 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
24053 emit_label (label);
24054 LABEL_NUSES (label) = 1;
24058 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
24059 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
24060 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
24061 ignored.
24062 Return value is updated DESTMEM. */
24063 static rtx
24064 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
24065 rtx destptr, rtx srcptr, rtx value,
24066 rtx vec_value, rtx count, int align,
24067 int desired_alignment, bool issetmem)
24069 int i;
24070 for (i = 1; i < desired_alignment; i <<= 1)
24072 if (align <= i)
24074 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
24075 if (issetmem)
24077 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
24078 destmem = emit_memset (destmem, destptr, vec_value, i);
24079 else
24080 destmem = emit_memset (destmem, destptr, value, i);
24082 else
24083 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
24084 ix86_adjust_counter (count, i);
24085 emit_label (label);
24086 LABEL_NUSES (label) = 1;
24087 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
24090 return destmem;
24093 /* Test if COUNT&SIZE is nonzero and if so, expand movme
24094 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
24095 and jump to DONE_LABEL. */
24096 static void
24097 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
24098 rtx destptr, rtx srcptr,
24099 rtx value, rtx vec_value,
24100 rtx count, int size,
24101 rtx done_label, bool issetmem)
24103 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
24104 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
24105 rtx modesize;
24106 int n;
24108 /* If we do not have vector value to copy, we must reduce size. */
24109 if (issetmem)
24111 if (!vec_value)
24113 if (GET_MODE (value) == VOIDmode && size > 8)
24114 mode = Pmode;
24115 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
24116 mode = GET_MODE (value);
24118 else
24119 mode = GET_MODE (vec_value), value = vec_value;
24121 else
24123 /* Choose appropriate vector mode. */
24124 if (size >= 32)
24125 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
24126 else if (size >= 16)
24127 mode = TARGET_SSE ? V16QImode : DImode;
24128 srcmem = change_address (srcmem, mode, srcptr);
24130 destmem = change_address (destmem, mode, destptr);
24131 modesize = GEN_INT (GET_MODE_SIZE (mode));
24132 gcc_assert (GET_MODE_SIZE (mode) <= size);
24133 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24135 if (issetmem)
24136 emit_move_insn (destmem, gen_lowpart (mode, value));
24137 else
24139 emit_move_insn (destmem, srcmem);
24140 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24142 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24145 destmem = offset_address (destmem, count, 1);
24146 destmem = offset_address (destmem, GEN_INT (-2 * size),
24147 GET_MODE_SIZE (mode));
24148 if (!issetmem)
24150 srcmem = offset_address (srcmem, count, 1);
24151 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
24152 GET_MODE_SIZE (mode));
24154 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24156 if (issetmem)
24157 emit_move_insn (destmem, gen_lowpart (mode, value));
24158 else
24160 emit_move_insn (destmem, srcmem);
24161 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24163 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24165 emit_jump_insn (gen_jump (done_label));
24166 emit_barrier ();
24168 emit_label (label);
24169 LABEL_NUSES (label) = 1;
24172 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
24173 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
24174 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
24175 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
24176 DONE_LABEL is a label after the whole copying sequence. The label is created
24177 on demand if *DONE_LABEL is NULL.
24178 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
24179 bounds after the initial copies.
24181 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
24182 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
24183 we will dispatch to a library call for large blocks.
24185 In pseudocode we do:
24187 if (COUNT < SIZE)
24189 Assume that SIZE is 4. Bigger sizes are handled analogously
24190 if (COUNT & 4)
24192 copy 4 bytes from SRCPTR to DESTPTR
24193 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
24194 goto done_label
24196 if (!COUNT)
24197 goto done_label;
24198 copy 1 byte from SRCPTR to DESTPTR
24199 if (COUNT & 2)
24201 copy 2 bytes from SRCPTR to DESTPTR
24202 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
24205 else
24207 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
24208 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
24210 OLD_DESPTR = DESTPTR;
24211 Align DESTPTR up to DESIRED_ALIGN
24212 SRCPTR += DESTPTR - OLD_DESTPTR
24213 COUNT -= DEST_PTR - OLD_DESTPTR
24214 if (DYNAMIC_CHECK)
24215 Round COUNT down to multiple of SIZE
24216 << optional caller supplied zero size guard is here >>
24217 << optional caller suppplied dynamic check is here >>
24218 << caller supplied main copy loop is here >>
24220 done_label:
24222 static void
24223 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24224 rtx *destptr, rtx *srcptr,
24225 machine_mode mode,
24226 rtx value, rtx vec_value,
24227 rtx *count,
24228 rtx_code_label **done_label,
24229 int size,
24230 int desired_align,
24231 int align,
24232 unsigned HOST_WIDE_INT *min_size,
24233 bool dynamic_check,
24234 bool issetmem)
24236 rtx_code_label *loop_label = NULL, *label;
24237 int n;
24238 rtx modesize;
24239 int prolog_size = 0;
24240 rtx mode_value;
24242 /* Chose proper value to copy. */
24243 if (issetmem && VECTOR_MODE_P (mode))
24244 mode_value = vec_value;
24245 else
24246 mode_value = value;
24247 gcc_assert (GET_MODE_SIZE (mode) <= size);
24249 /* See if block is big or small, handle small blocks. */
24250 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24252 int size2 = size;
24253 loop_label = gen_label_rtx ();
24255 if (!*done_label)
24256 *done_label = gen_label_rtx ();
24258 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24259 1, loop_label);
24260 size2 >>= 1;
24262 /* Handle sizes > 3. */
24263 for (;size2 > 2; size2 >>= 1)
24264 expand_small_movmem_or_setmem (destmem, srcmem,
24265 *destptr, *srcptr,
24266 value, vec_value,
24267 *count,
24268 size2, *done_label, issetmem);
24269 /* Nothing to copy? Jump to DONE_LABEL if so */
24270 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24271 1, *done_label);
24273 /* Do a byte copy. */
24274 destmem = change_address (destmem, QImode, *destptr);
24275 if (issetmem)
24276 emit_move_insn (destmem, gen_lowpart (QImode, value));
24277 else
24279 srcmem = change_address (srcmem, QImode, *srcptr);
24280 emit_move_insn (destmem, srcmem);
24283 /* Handle sizes 2 and 3. */
24284 label = ix86_expand_aligntest (*count, 2, false);
24285 destmem = change_address (destmem, HImode, *destptr);
24286 destmem = offset_address (destmem, *count, 1);
24287 destmem = offset_address (destmem, GEN_INT (-2), 2);
24288 if (issetmem)
24289 emit_move_insn (destmem, gen_lowpart (HImode, value));
24290 else
24292 srcmem = change_address (srcmem, HImode, *srcptr);
24293 srcmem = offset_address (srcmem, *count, 1);
24294 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24295 emit_move_insn (destmem, srcmem);
24298 emit_label (label);
24299 LABEL_NUSES (label) = 1;
24300 emit_jump_insn (gen_jump (*done_label));
24301 emit_barrier ();
24303 else
24304 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24305 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24307 /* Start memcpy for COUNT >= SIZE. */
24308 if (loop_label)
24310 emit_label (loop_label);
24311 LABEL_NUSES (loop_label) = 1;
24314 /* Copy first desired_align bytes. */
24315 if (!issetmem)
24316 srcmem = change_address (srcmem, mode, *srcptr);
24317 destmem = change_address (destmem, mode, *destptr);
24318 modesize = GEN_INT (GET_MODE_SIZE (mode));
24319 for (n = 0; prolog_size < desired_align - align; n++)
24321 if (issetmem)
24322 emit_move_insn (destmem, mode_value);
24323 else
24325 emit_move_insn (destmem, srcmem);
24326 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24328 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24329 prolog_size += GET_MODE_SIZE (mode);
24333 /* Copy last SIZE bytes. */
24334 destmem = offset_address (destmem, *count, 1);
24335 destmem = offset_address (destmem,
24336 GEN_INT (-size - prolog_size),
24338 if (issetmem)
24339 emit_move_insn (destmem, mode_value);
24340 else
24342 srcmem = offset_address (srcmem, *count, 1);
24343 srcmem = offset_address (srcmem,
24344 GEN_INT (-size - prolog_size),
24346 emit_move_insn (destmem, srcmem);
24348 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24350 destmem = offset_address (destmem, modesize, 1);
24351 if (issetmem)
24352 emit_move_insn (destmem, mode_value);
24353 else
24355 srcmem = offset_address (srcmem, modesize, 1);
24356 emit_move_insn (destmem, srcmem);
24360 /* Align destination. */
24361 if (desired_align > 1 && desired_align > align)
24363 rtx saveddest = *destptr;
24365 gcc_assert (desired_align <= size);
24366 /* Align destptr up, place it to new register. */
24367 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24368 GEN_INT (prolog_size),
24369 NULL_RTX, 1, OPTAB_DIRECT);
24370 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
24371 REG_POINTER (*destptr) = 1;
24372 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24373 GEN_INT (-desired_align),
24374 *destptr, 1, OPTAB_DIRECT);
24375 /* See how many bytes we skipped. */
24376 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24377 *destptr,
24378 saveddest, 1, OPTAB_DIRECT);
24379 /* Adjust srcptr and count. */
24380 if (!issetmem)
24381 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
24382 saveddest, *srcptr, 1, OPTAB_DIRECT);
24383 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24384 saveddest, *count, 1, OPTAB_DIRECT);
24385 /* We copied at most size + prolog_size. */
24386 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24387 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24388 else
24389 *min_size = 0;
24391 /* Our loops always round down the bock size, but for dispatch to library
24392 we need precise value. */
24393 if (dynamic_check)
24394 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
24395 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24397 else
24399 gcc_assert (prolog_size == 0);
24400 /* Decrease count, so we won't end up copying last word twice. */
24401 if (!CONST_INT_P (*count))
24402 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24403 constm1_rtx, *count, 1, OPTAB_DIRECT);
24404 else
24405 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24406 if (*min_size)
24407 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24412 /* This function is like the previous one, except here we know how many bytes
24413 need to be copied. That allows us to update alignment not only of DST, which
24414 is returned, but also of SRC, which is passed as a pointer for that
24415 reason. */
24416 static rtx
24417 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24418 rtx srcreg, rtx value, rtx vec_value,
24419 int desired_align, int align_bytes,
24420 bool issetmem)
24422 rtx src = NULL;
24423 rtx orig_dst = dst;
24424 rtx orig_src = NULL;
24425 int piece_size = 1;
24426 int copied_bytes = 0;
24428 if (!issetmem)
24430 gcc_assert (srcp != NULL);
24431 src = *srcp;
24432 orig_src = src;
24435 for (piece_size = 1;
24436 piece_size <= desired_align && copied_bytes < align_bytes;
24437 piece_size <<= 1)
24439 if (align_bytes & piece_size)
24441 if (issetmem)
24443 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24444 dst = emit_memset (dst, destreg, vec_value, piece_size);
24445 else
24446 dst = emit_memset (dst, destreg, value, piece_size);
24448 else
24449 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24450 copied_bytes += piece_size;
24453 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24454 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24455 if (MEM_SIZE_KNOWN_P (orig_dst))
24456 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24458 if (!issetmem)
24460 int src_align_bytes = get_mem_align_offset (src, desired_align
24461 * BITS_PER_UNIT);
24462 if (src_align_bytes >= 0)
24463 src_align_bytes = desired_align - src_align_bytes;
24464 if (src_align_bytes >= 0)
24466 unsigned int src_align;
24467 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24469 if ((src_align_bytes & (src_align - 1))
24470 == (align_bytes & (src_align - 1)))
24471 break;
24473 if (src_align > (unsigned int) desired_align)
24474 src_align = desired_align;
24475 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24476 set_mem_align (src, src_align * BITS_PER_UNIT);
24478 if (MEM_SIZE_KNOWN_P (orig_src))
24479 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24480 *srcp = src;
24483 return dst;
24486 /* Return true if ALG can be used in current context.
24487 Assume we expand memset if MEMSET is true. */
24488 static bool
24489 alg_usable_p (enum stringop_alg alg, bool memset)
24491 if (alg == no_stringop)
24492 return false;
24493 if (alg == vector_loop)
24494 return TARGET_SSE || TARGET_AVX;
24495 /* Algorithms using the rep prefix want at least edi and ecx;
24496 additionally, memset wants eax and memcpy wants esi. Don't
24497 consider such algorithms if the user has appropriated those
24498 registers for their own purposes. */
24499 if (alg == rep_prefix_1_byte
24500 || alg == rep_prefix_4_byte
24501 || alg == rep_prefix_8_byte)
24502 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24503 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24504 return true;
24507 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24508 static enum stringop_alg
24509 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24510 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24511 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24513 const struct stringop_algs * algs;
24514 bool optimize_for_speed;
24515 int max = 0;
24516 const struct processor_costs *cost;
24517 int i;
24518 bool any_alg_usable_p = false;
24520 *noalign = false;
24521 *dynamic_check = -1;
24523 /* Even if the string operation call is cold, we still might spend a lot
24524 of time processing large blocks. */
24525 if (optimize_function_for_size_p (cfun)
24526 || (optimize_insn_for_size_p ()
24527 && (max_size < 256
24528 || (expected_size != -1 && expected_size < 256))))
24529 optimize_for_speed = false;
24530 else
24531 optimize_for_speed = true;
24533 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24534 if (memset)
24535 algs = &cost->memset[TARGET_64BIT != 0];
24536 else
24537 algs = &cost->memcpy[TARGET_64BIT != 0];
24539 /* See maximal size for user defined algorithm. */
24540 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24542 enum stringop_alg candidate = algs->size[i].alg;
24543 bool usable = alg_usable_p (candidate, memset);
24544 any_alg_usable_p |= usable;
24546 if (candidate != libcall && candidate && usable)
24547 max = algs->size[i].max;
24550 /* If expected size is not known but max size is small enough
24551 so inline version is a win, set expected size into
24552 the range. */
24553 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24554 && expected_size == -1)
24555 expected_size = min_size / 2 + max_size / 2;
24557 /* If user specified the algorithm, honnor it if possible. */
24558 if (ix86_stringop_alg != no_stringop
24559 && alg_usable_p (ix86_stringop_alg, memset))
24560 return ix86_stringop_alg;
24561 /* rep; movq or rep; movl is the smallest variant. */
24562 else if (!optimize_for_speed)
24564 *noalign = true;
24565 if (!count || (count & 3) || (memset && !zero_memset))
24566 return alg_usable_p (rep_prefix_1_byte, memset)
24567 ? rep_prefix_1_byte : loop_1_byte;
24568 else
24569 return alg_usable_p (rep_prefix_4_byte, memset)
24570 ? rep_prefix_4_byte : loop;
24572 /* Very tiny blocks are best handled via the loop, REP is expensive to
24573 setup. */
24574 else if (expected_size != -1 && expected_size < 4)
24575 return loop_1_byte;
24576 else if (expected_size != -1)
24578 enum stringop_alg alg = libcall;
24579 bool alg_noalign = false;
24580 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24582 /* We get here if the algorithms that were not libcall-based
24583 were rep-prefix based and we are unable to use rep prefixes
24584 based on global register usage. Break out of the loop and
24585 use the heuristic below. */
24586 if (algs->size[i].max == 0)
24587 break;
24588 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24590 enum stringop_alg candidate = algs->size[i].alg;
24592 if (candidate != libcall && alg_usable_p (candidate, memset))
24594 alg = candidate;
24595 alg_noalign = algs->size[i].noalign;
24597 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24598 last non-libcall inline algorithm. */
24599 if (TARGET_INLINE_ALL_STRINGOPS)
24601 /* When the current size is best to be copied by a libcall,
24602 but we are still forced to inline, run the heuristic below
24603 that will pick code for medium sized blocks. */
24604 if (alg != libcall)
24606 *noalign = alg_noalign;
24607 return alg;
24609 else if (!any_alg_usable_p)
24610 break;
24612 else if (alg_usable_p (candidate, memset))
24614 *noalign = algs->size[i].noalign;
24615 return candidate;
24620 /* When asked to inline the call anyway, try to pick meaningful choice.
24621 We look for maximal size of block that is faster to copy by hand and
24622 take blocks of at most of that size guessing that average size will
24623 be roughly half of the block.
24625 If this turns out to be bad, we might simply specify the preferred
24626 choice in ix86_costs. */
24627 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24628 && (algs->unknown_size == libcall
24629 || !alg_usable_p (algs->unknown_size, memset)))
24631 enum stringop_alg alg;
24633 /* If there aren't any usable algorithms, then recursing on
24634 smaller sizes isn't going to find anything. Just return the
24635 simple byte-at-a-time copy loop. */
24636 if (!any_alg_usable_p)
24638 /* Pick something reasonable. */
24639 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24640 *dynamic_check = 128;
24641 return loop_1_byte;
24643 if (max <= 0)
24644 max = 4096;
24645 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24646 zero_memset, dynamic_check, noalign);
24647 gcc_assert (*dynamic_check == -1);
24648 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24649 *dynamic_check = max;
24650 else
24651 gcc_assert (alg != libcall);
24652 return alg;
24654 return (alg_usable_p (algs->unknown_size, memset)
24655 ? algs->unknown_size : libcall);
24658 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24659 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24660 static int
24661 decide_alignment (int align,
24662 enum stringop_alg alg,
24663 int expected_size,
24664 machine_mode move_mode)
24666 int desired_align = 0;
24668 gcc_assert (alg != no_stringop);
24670 if (alg == libcall)
24671 return 0;
24672 if (move_mode == VOIDmode)
24673 return 0;
24675 desired_align = GET_MODE_SIZE (move_mode);
24676 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24677 copying whole cacheline at once. */
24678 if (TARGET_PENTIUMPRO
24679 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24680 desired_align = 8;
24682 if (optimize_size)
24683 desired_align = 1;
24684 if (desired_align < align)
24685 desired_align = align;
24686 if (expected_size != -1 && expected_size < 4)
24687 desired_align = align;
24689 return desired_align;
24693 /* Helper function for memcpy. For QImode value 0xXY produce
24694 0xXYXYXYXY of wide specified by MODE. This is essentially
24695 a * 0x10101010, but we can do slightly better than
24696 synth_mult by unwinding the sequence by hand on CPUs with
24697 slow multiply. */
24698 static rtx
24699 promote_duplicated_reg (machine_mode mode, rtx val)
24701 machine_mode valmode = GET_MODE (val);
24702 rtx tmp;
24703 int nops = mode == DImode ? 3 : 2;
24705 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24706 if (val == const0_rtx)
24707 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24708 if (CONST_INT_P (val))
24710 HOST_WIDE_INT v = INTVAL (val) & 255;
24712 v |= v << 8;
24713 v |= v << 16;
24714 if (mode == DImode)
24715 v |= (v << 16) << 16;
24716 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24719 if (valmode == VOIDmode)
24720 valmode = QImode;
24721 if (valmode != QImode)
24722 val = gen_lowpart (QImode, val);
24723 if (mode == QImode)
24724 return val;
24725 if (!TARGET_PARTIAL_REG_STALL)
24726 nops--;
24727 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24728 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24729 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24730 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24732 rtx reg = convert_modes (mode, QImode, val, true);
24733 tmp = promote_duplicated_reg (mode, const1_rtx);
24734 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24735 OPTAB_DIRECT);
24737 else
24739 rtx reg = convert_modes (mode, QImode, val, true);
24741 if (!TARGET_PARTIAL_REG_STALL)
24742 if (mode == SImode)
24743 emit_insn (gen_movsi_insv_1 (reg, reg));
24744 else
24745 emit_insn (gen_movdi_insv_1 (reg, reg));
24746 else
24748 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24749 NULL, 1, OPTAB_DIRECT);
24750 reg =
24751 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24753 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24754 NULL, 1, OPTAB_DIRECT);
24755 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24756 if (mode == SImode)
24757 return reg;
24758 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24759 NULL, 1, OPTAB_DIRECT);
24760 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24761 return reg;
24765 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24766 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24767 alignment from ALIGN to DESIRED_ALIGN. */
24768 static rtx
24769 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24770 int align)
24772 rtx promoted_val;
24774 if (TARGET_64BIT
24775 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24776 promoted_val = promote_duplicated_reg (DImode, val);
24777 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24778 promoted_val = promote_duplicated_reg (SImode, val);
24779 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24780 promoted_val = promote_duplicated_reg (HImode, val);
24781 else
24782 promoted_val = val;
24784 return promoted_val;
24787 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24788 operations when profitable. The code depends upon architecture, block size
24789 and alignment, but always has one of the following overall structures:
24791 Aligned move sequence:
24793 1) Prologue guard: Conditional that jumps up to epilogues for small
24794 blocks that can be handled by epilogue alone. This is faster
24795 but also needed for correctness, since prologue assume the block
24796 is larger than the desired alignment.
24798 Optional dynamic check for size and libcall for large
24799 blocks is emitted here too, with -minline-stringops-dynamically.
24801 2) Prologue: copy first few bytes in order to get destination
24802 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24803 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24804 copied. We emit either a jump tree on power of two sized
24805 blocks, or a byte loop.
24807 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24808 with specified algorithm.
24810 4) Epilogue: code copying tail of the block that is too small to be
24811 handled by main body (or up to size guarded by prologue guard).
24813 Misaligned move sequence
24815 1) missaligned move prologue/epilogue containing:
24816 a) Prologue handling small memory blocks and jumping to done_label
24817 (skipped if blocks are known to be large enough)
24818 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24819 needed by single possibly misaligned move
24820 (skipped if alignment is not needed)
24821 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24823 2) Zero size guard dispatching to done_label, if needed
24825 3) dispatch to library call, if needed,
24827 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24828 with specified algorithm. */
24829 bool
24830 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24831 rtx align_exp, rtx expected_align_exp,
24832 rtx expected_size_exp, rtx min_size_exp,
24833 rtx max_size_exp, rtx probable_max_size_exp,
24834 bool issetmem)
24836 rtx destreg;
24837 rtx srcreg = NULL;
24838 rtx_code_label *label = NULL;
24839 rtx tmp;
24840 rtx_code_label *jump_around_label = NULL;
24841 HOST_WIDE_INT align = 1;
24842 unsigned HOST_WIDE_INT count = 0;
24843 HOST_WIDE_INT expected_size = -1;
24844 int size_needed = 0, epilogue_size_needed;
24845 int desired_align = 0, align_bytes = 0;
24846 enum stringop_alg alg;
24847 rtx promoted_val = NULL;
24848 rtx vec_promoted_val = NULL;
24849 bool force_loopy_epilogue = false;
24850 int dynamic_check;
24851 bool need_zero_guard = false;
24852 bool noalign;
24853 machine_mode move_mode = VOIDmode;
24854 int unroll_factor = 1;
24855 /* TODO: Once value ranges are available, fill in proper data. */
24856 unsigned HOST_WIDE_INT min_size = 0;
24857 unsigned HOST_WIDE_INT max_size = -1;
24858 unsigned HOST_WIDE_INT probable_max_size = -1;
24859 bool misaligned_prologue_used = false;
24861 if (CONST_INT_P (align_exp))
24862 align = INTVAL (align_exp);
24863 /* i386 can do misaligned access on reasonably increased cost. */
24864 if (CONST_INT_P (expected_align_exp)
24865 && INTVAL (expected_align_exp) > align)
24866 align = INTVAL (expected_align_exp);
24867 /* ALIGN is the minimum of destination and source alignment, but we care here
24868 just about destination alignment. */
24869 else if (!issetmem
24870 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24871 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24873 if (CONST_INT_P (count_exp))
24875 min_size = max_size = probable_max_size = count = expected_size
24876 = INTVAL (count_exp);
24877 /* When COUNT is 0, there is nothing to do. */
24878 if (!count)
24879 return true;
24881 else
24883 if (min_size_exp)
24884 min_size = INTVAL (min_size_exp);
24885 if (max_size_exp)
24886 max_size = INTVAL (max_size_exp);
24887 if (probable_max_size_exp)
24888 probable_max_size = INTVAL (probable_max_size_exp);
24889 if (CONST_INT_P (expected_size_exp))
24890 expected_size = INTVAL (expected_size_exp);
24893 /* Make sure we don't need to care about overflow later on. */
24894 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24895 return false;
24897 /* Step 0: Decide on preferred algorithm, desired alignment and
24898 size of chunks to be copied by main loop. */
24899 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24900 issetmem,
24901 issetmem && val_exp == const0_rtx,
24902 &dynamic_check, &noalign);
24903 if (alg == libcall)
24904 return false;
24905 gcc_assert (alg != no_stringop);
24907 /* For now vector-version of memset is generated only for memory zeroing, as
24908 creating of promoted vector value is very cheap in this case. */
24909 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24910 alg = unrolled_loop;
24912 if (!count)
24913 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24914 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24915 if (!issetmem)
24916 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24918 unroll_factor = 1;
24919 move_mode = word_mode;
24920 switch (alg)
24922 case libcall:
24923 case no_stringop:
24924 case last_alg:
24925 gcc_unreachable ();
24926 case loop_1_byte:
24927 need_zero_guard = true;
24928 move_mode = QImode;
24929 break;
24930 case loop:
24931 need_zero_guard = true;
24932 break;
24933 case unrolled_loop:
24934 need_zero_guard = true;
24935 unroll_factor = (TARGET_64BIT ? 4 : 2);
24936 break;
24937 case vector_loop:
24938 need_zero_guard = true;
24939 unroll_factor = 4;
24940 /* Find the widest supported mode. */
24941 move_mode = word_mode;
24942 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24943 != CODE_FOR_nothing)
24944 move_mode = GET_MODE_WIDER_MODE (move_mode);
24946 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24947 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24948 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24950 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24951 move_mode = mode_for_vector (word_mode, nunits);
24952 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24953 move_mode = word_mode;
24955 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24956 break;
24957 case rep_prefix_8_byte:
24958 move_mode = DImode;
24959 break;
24960 case rep_prefix_4_byte:
24961 move_mode = SImode;
24962 break;
24963 case rep_prefix_1_byte:
24964 move_mode = QImode;
24965 break;
24967 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24968 epilogue_size_needed = size_needed;
24970 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24971 if (!TARGET_ALIGN_STRINGOPS || noalign)
24972 align = desired_align;
24974 /* Step 1: Prologue guard. */
24976 /* Alignment code needs count to be in register. */
24977 if (CONST_INT_P (count_exp) && desired_align > align)
24979 if (INTVAL (count_exp) > desired_align
24980 && INTVAL (count_exp) > size_needed)
24982 align_bytes
24983 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24984 if (align_bytes <= 0)
24985 align_bytes = 0;
24986 else
24987 align_bytes = desired_align - align_bytes;
24989 if (align_bytes == 0)
24990 count_exp = force_reg (counter_mode (count_exp), count_exp);
24992 gcc_assert (desired_align >= 1 && align >= 1);
24994 /* Misaligned move sequences handle both prologue and epilogue at once.
24995 Default code generation results in a smaller code for large alignments
24996 and also avoids redundant job when sizes are known precisely. */
24997 misaligned_prologue_used
24998 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24999 && MAX (desired_align, epilogue_size_needed) <= 32
25000 && desired_align <= epilogue_size_needed
25001 && ((desired_align > align && !align_bytes)
25002 || (!count && epilogue_size_needed > 1)));
25004 /* Do the cheap promotion to allow better CSE across the
25005 main loop and epilogue (ie one load of the big constant in the
25006 front of all code.
25007 For now the misaligned move sequences do not have fast path
25008 without broadcasting. */
25009 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
25011 if (alg == vector_loop)
25013 gcc_assert (val_exp == const0_rtx);
25014 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
25015 promoted_val = promote_duplicated_reg_to_size (val_exp,
25016 GET_MODE_SIZE (word_mode),
25017 desired_align, align);
25019 else
25021 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25022 desired_align, align);
25025 /* Misaligned move sequences handles both prologues and epilogues at once.
25026 Default code generation results in smaller code for large alignments and
25027 also avoids redundant job when sizes are known precisely. */
25028 if (misaligned_prologue_used)
25030 /* Misaligned move prologue handled small blocks by itself. */
25031 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
25032 (dst, src, &destreg, &srcreg,
25033 move_mode, promoted_val, vec_promoted_val,
25034 &count_exp,
25035 &jump_around_label,
25036 desired_align < align
25037 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
25038 desired_align, align, &min_size, dynamic_check, issetmem);
25039 if (!issetmem)
25040 src = change_address (src, BLKmode, srcreg);
25041 dst = change_address (dst, BLKmode, destreg);
25042 set_mem_align (dst, desired_align * BITS_PER_UNIT);
25043 epilogue_size_needed = 0;
25044 if (need_zero_guard && !min_size)
25046 /* It is possible that we copied enough so the main loop will not
25047 execute. */
25048 gcc_assert (size_needed > 1);
25049 if (jump_around_label == NULL_RTX)
25050 jump_around_label = gen_label_rtx ();
25051 emit_cmp_and_jump_insns (count_exp,
25052 GEN_INT (size_needed),
25053 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
25054 if (expected_size == -1
25055 || expected_size < (desired_align - align) / 2 + size_needed)
25056 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25057 else
25058 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25061 /* Ensure that alignment prologue won't copy past end of block. */
25062 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
25064 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
25065 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
25066 Make sure it is power of 2. */
25067 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
25069 /* To improve performance of small blocks, we jump around the VAL
25070 promoting mode. This mean that if the promoted VAL is not constant,
25071 we might not use it in the epilogue and have to use byte
25072 loop variant. */
25073 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
25074 force_loopy_epilogue = true;
25075 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25076 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25078 /* If main algorithm works on QImode, no epilogue is needed.
25079 For small sizes just don't align anything. */
25080 if (size_needed == 1)
25081 desired_align = align;
25082 else
25083 goto epilogue;
25085 else if (!count
25086 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25088 label = gen_label_rtx ();
25089 emit_cmp_and_jump_insns (count_exp,
25090 GEN_INT (epilogue_size_needed),
25091 LTU, 0, counter_mode (count_exp), 1, label);
25092 if (expected_size == -1 || expected_size < epilogue_size_needed)
25093 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25094 else
25095 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25099 /* Emit code to decide on runtime whether library call or inline should be
25100 used. */
25101 if (dynamic_check != -1)
25103 if (!issetmem && CONST_INT_P (count_exp))
25105 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
25107 emit_block_move_via_libcall (dst, src, count_exp, false);
25108 count_exp = const0_rtx;
25109 goto epilogue;
25112 else
25114 rtx_code_label *hot_label = gen_label_rtx ();
25115 if (jump_around_label == NULL_RTX)
25116 jump_around_label = gen_label_rtx ();
25117 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
25118 LEU, 0, counter_mode (count_exp),
25119 1, hot_label);
25120 predict_jump (REG_BR_PROB_BASE * 90 / 100);
25121 if (issetmem)
25122 set_storage_via_libcall (dst, count_exp, val_exp, false);
25123 else
25124 emit_block_move_via_libcall (dst, src, count_exp, false);
25125 emit_jump (jump_around_label);
25126 emit_label (hot_label);
25130 /* Step 2: Alignment prologue. */
25131 /* Do the expensive promotion once we branched off the small blocks. */
25132 if (issetmem && !promoted_val)
25133 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25134 desired_align, align);
25136 if (desired_align > align && !misaligned_prologue_used)
25138 if (align_bytes == 0)
25140 /* Except for the first move in prologue, we no longer know
25141 constant offset in aliasing info. It don't seems to worth
25142 the pain to maintain it for the first move, so throw away
25143 the info early. */
25144 dst = change_address (dst, BLKmode, destreg);
25145 if (!issetmem)
25146 src = change_address (src, BLKmode, srcreg);
25147 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
25148 promoted_val, vec_promoted_val,
25149 count_exp, align, desired_align,
25150 issetmem);
25151 /* At most desired_align - align bytes are copied. */
25152 if (min_size < (unsigned)(desired_align - align))
25153 min_size = 0;
25154 else
25155 min_size -= desired_align - align;
25157 else
25159 /* If we know how many bytes need to be stored before dst is
25160 sufficiently aligned, maintain aliasing info accurately. */
25161 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
25162 srcreg,
25163 promoted_val,
25164 vec_promoted_val,
25165 desired_align,
25166 align_bytes,
25167 issetmem);
25169 count_exp = plus_constant (counter_mode (count_exp),
25170 count_exp, -align_bytes);
25171 count -= align_bytes;
25172 min_size -= align_bytes;
25173 max_size -= align_bytes;
25175 if (need_zero_guard
25176 && !min_size
25177 && (count < (unsigned HOST_WIDE_INT) size_needed
25178 || (align_bytes == 0
25179 && count < ((unsigned HOST_WIDE_INT) size_needed
25180 + desired_align - align))))
25182 /* It is possible that we copied enough so the main loop will not
25183 execute. */
25184 gcc_assert (size_needed > 1);
25185 if (label == NULL_RTX)
25186 label = gen_label_rtx ();
25187 emit_cmp_and_jump_insns (count_exp,
25188 GEN_INT (size_needed),
25189 LTU, 0, counter_mode (count_exp), 1, label);
25190 if (expected_size == -1
25191 || expected_size < (desired_align - align) / 2 + size_needed)
25192 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25193 else
25194 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25197 if (label && size_needed == 1)
25199 emit_label (label);
25200 LABEL_NUSES (label) = 1;
25201 label = NULL;
25202 epilogue_size_needed = 1;
25203 if (issetmem)
25204 promoted_val = val_exp;
25206 else if (label == NULL_RTX && !misaligned_prologue_used)
25207 epilogue_size_needed = size_needed;
25209 /* Step 3: Main loop. */
25211 switch (alg)
25213 case libcall:
25214 case no_stringop:
25215 case last_alg:
25216 gcc_unreachable ();
25217 case loop_1_byte:
25218 case loop:
25219 case unrolled_loop:
25220 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25221 count_exp, move_mode, unroll_factor,
25222 expected_size, issetmem);
25223 break;
25224 case vector_loop:
25225 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25226 vec_promoted_val, count_exp, move_mode,
25227 unroll_factor, expected_size, issetmem);
25228 break;
25229 case rep_prefix_8_byte:
25230 case rep_prefix_4_byte:
25231 case rep_prefix_1_byte:
25232 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25233 val_exp, count_exp, move_mode, issetmem);
25234 break;
25236 /* Adjust properly the offset of src and dest memory for aliasing. */
25237 if (CONST_INT_P (count_exp))
25239 if (!issetmem)
25240 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25241 (count / size_needed) * size_needed);
25242 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25243 (count / size_needed) * size_needed);
25245 else
25247 if (!issetmem)
25248 src = change_address (src, BLKmode, srcreg);
25249 dst = change_address (dst, BLKmode, destreg);
25252 /* Step 4: Epilogue to copy the remaining bytes. */
25253 epilogue:
25254 if (label)
25256 /* When the main loop is done, COUNT_EXP might hold original count,
25257 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25258 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25259 bytes. Compensate if needed. */
25261 if (size_needed < epilogue_size_needed)
25263 tmp =
25264 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25265 GEN_INT (size_needed - 1), count_exp, 1,
25266 OPTAB_DIRECT);
25267 if (tmp != count_exp)
25268 emit_move_insn (count_exp, tmp);
25270 emit_label (label);
25271 LABEL_NUSES (label) = 1;
25274 if (count_exp != const0_rtx && epilogue_size_needed > 1)
25276 if (force_loopy_epilogue)
25277 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25278 epilogue_size_needed);
25279 else
25281 if (issetmem)
25282 expand_setmem_epilogue (dst, destreg, promoted_val,
25283 vec_promoted_val, count_exp,
25284 epilogue_size_needed);
25285 else
25286 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25287 epilogue_size_needed);
25290 if (jump_around_label)
25291 emit_label (jump_around_label);
25292 return true;
25296 /* Expand the appropriate insns for doing strlen if not just doing
25297 repnz; scasb
25299 out = result, initialized with the start address
25300 align_rtx = alignment of the address.
25301 scratch = scratch register, initialized with the startaddress when
25302 not aligned, otherwise undefined
25304 This is just the body. It needs the initializations mentioned above and
25305 some address computing at the end. These things are done in i386.md. */
25307 static void
25308 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25310 int align;
25311 rtx tmp;
25312 rtx_code_label *align_2_label = NULL;
25313 rtx_code_label *align_3_label = NULL;
25314 rtx_code_label *align_4_label = gen_label_rtx ();
25315 rtx_code_label *end_0_label = gen_label_rtx ();
25316 rtx mem;
25317 rtx tmpreg = gen_reg_rtx (SImode);
25318 rtx scratch = gen_reg_rtx (SImode);
25319 rtx cmp;
25321 align = 0;
25322 if (CONST_INT_P (align_rtx))
25323 align = INTVAL (align_rtx);
25325 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
25327 /* Is there a known alignment and is it less than 4? */
25328 if (align < 4)
25330 rtx scratch1 = gen_reg_rtx (Pmode);
25331 emit_move_insn (scratch1, out);
25332 /* Is there a known alignment and is it not 2? */
25333 if (align != 2)
25335 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25336 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25338 /* Leave just the 3 lower bits. */
25339 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25340 NULL_RTX, 0, OPTAB_WIDEN);
25342 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25343 Pmode, 1, align_4_label);
25344 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25345 Pmode, 1, align_2_label);
25346 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25347 Pmode, 1, align_3_label);
25349 else
25351 /* Since the alignment is 2, we have to check 2 or 0 bytes;
25352 check if is aligned to 4 - byte. */
25354 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25355 NULL_RTX, 0, OPTAB_WIDEN);
25357 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25358 Pmode, 1, align_4_label);
25361 mem = change_address (src, QImode, out);
25363 /* Now compare the bytes. */
25365 /* Compare the first n unaligned byte on a byte per byte basis. */
25366 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25367 QImode, 1, end_0_label);
25369 /* Increment the address. */
25370 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25372 /* Not needed with an alignment of 2 */
25373 if (align != 2)
25375 emit_label (align_2_label);
25377 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25378 end_0_label);
25380 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25382 emit_label (align_3_label);
25385 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25386 end_0_label);
25388 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25391 /* Generate loop to check 4 bytes at a time. It is not a good idea to
25392 align this loop. It gives only huge programs, but does not help to
25393 speed up. */
25394 emit_label (align_4_label);
25396 mem = change_address (src, SImode, out);
25397 emit_move_insn (scratch, mem);
25398 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25400 /* This formula yields a nonzero result iff one of the bytes is zero.
25401 This saves three branches inside loop and many cycles. */
25403 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25404 emit_insn (gen_one_cmplsi2 (scratch, scratch));
25405 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25406 emit_insn (gen_andsi3 (tmpreg, tmpreg,
25407 gen_int_mode (0x80808080, SImode)));
25408 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25409 align_4_label);
25411 if (TARGET_CMOVE)
25413 rtx reg = gen_reg_rtx (SImode);
25414 rtx reg2 = gen_reg_rtx (Pmode);
25415 emit_move_insn (reg, tmpreg);
25416 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25418 /* If zero is not in the first two bytes, move two bytes forward. */
25419 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25420 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25421 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25422 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
25423 gen_rtx_IF_THEN_ELSE (SImode, tmp,
25424 reg,
25425 tmpreg)));
25426 /* Emit lea manually to avoid clobbering of flags. */
25427 emit_insn (gen_rtx_SET (SImode, reg2,
25428 gen_rtx_PLUS (Pmode, out, const2_rtx)));
25430 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25431 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25432 emit_insn (gen_rtx_SET (VOIDmode, out,
25433 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25434 reg2,
25435 out)));
25437 else
25439 rtx_code_label *end_2_label = gen_label_rtx ();
25440 /* Is zero in the first two bytes? */
25442 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25443 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25444 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25445 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25446 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25447 pc_rtx);
25448 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25449 JUMP_LABEL (tmp) = end_2_label;
25451 /* Not in the first two. Move two bytes forward. */
25452 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25453 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25455 emit_label (end_2_label);
25459 /* Avoid branch in fixing the byte. */
25460 tmpreg = gen_lowpart (QImode, tmpreg);
25461 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
25462 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25463 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25464 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25466 emit_label (end_0_label);
25469 /* Expand strlen. */
25471 bool
25472 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25474 rtx addr, scratch1, scratch2, scratch3, scratch4;
25476 /* The generic case of strlen expander is long. Avoid it's
25477 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25479 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25480 && !TARGET_INLINE_ALL_STRINGOPS
25481 && !optimize_insn_for_size_p ()
25482 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25483 return false;
25485 addr = force_reg (Pmode, XEXP (src, 0));
25486 scratch1 = gen_reg_rtx (Pmode);
25488 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25489 && !optimize_insn_for_size_p ())
25491 /* Well it seems that some optimizer does not combine a call like
25492 foo(strlen(bar), strlen(bar));
25493 when the move and the subtraction is done here. It does calculate
25494 the length just once when these instructions are done inside of
25495 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25496 often used and I use one fewer register for the lifetime of
25497 output_strlen_unroll() this is better. */
25499 emit_move_insn (out, addr);
25501 ix86_expand_strlensi_unroll_1 (out, src, align);
25503 /* strlensi_unroll_1 returns the address of the zero at the end of
25504 the string, like memchr(), so compute the length by subtracting
25505 the start address. */
25506 emit_insn (ix86_gen_sub3 (out, out, addr));
25508 else
25510 rtx unspec;
25512 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25513 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25514 return false;
25516 scratch2 = gen_reg_rtx (Pmode);
25517 scratch3 = gen_reg_rtx (Pmode);
25518 scratch4 = force_reg (Pmode, constm1_rtx);
25520 emit_move_insn (scratch3, addr);
25521 eoschar = force_reg (QImode, eoschar);
25523 src = replace_equiv_address_nv (src, scratch3);
25525 /* If .md starts supporting :P, this can be done in .md. */
25526 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25527 scratch4), UNSPEC_SCAS);
25528 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25529 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25530 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25532 return true;
25535 /* For given symbol (function) construct code to compute address of it's PLT
25536 entry in large x86-64 PIC model. */
25537 static rtx
25538 construct_plt_address (rtx symbol)
25540 rtx tmp, unspec;
25542 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25543 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25544 gcc_assert (Pmode == DImode);
25546 tmp = gen_reg_rtx (Pmode);
25547 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25549 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25550 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25551 return tmp;
25555 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25556 rtx callarg2,
25557 rtx pop, bool sibcall)
25559 rtx vec[3];
25560 rtx use = NULL, call;
25561 unsigned int vec_len = 0;
25563 if (pop == const0_rtx)
25564 pop = NULL;
25565 gcc_assert (!TARGET_64BIT || !pop);
25567 if (TARGET_MACHO && !TARGET_64BIT)
25569 #if TARGET_MACHO
25570 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25571 fnaddr = machopic_indirect_call_target (fnaddr);
25572 #endif
25574 else
25576 /* Static functions and indirect calls don't need the pic register. */
25577 if (flag_pic
25578 && (!TARGET_64BIT
25579 || (ix86_cmodel == CM_LARGE_PIC
25580 && DEFAULT_ABI != MS_ABI))
25581 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25582 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
25584 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25585 if (ix86_use_pseudo_pic_reg ())
25586 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25587 pic_offset_table_rtx);
25591 /* Skip setting up RAX register for -mskip-rax-setup when there are no
25592 parameters passed in vector registers. */
25593 if (TARGET_64BIT
25594 && (INTVAL (callarg2) > 0
25595 || (INTVAL (callarg2) == 0
25596 && (TARGET_SSE || !flag_skip_rax_setup))))
25598 rtx al = gen_rtx_REG (QImode, AX_REG);
25599 emit_move_insn (al, callarg2);
25600 use_reg (&use, al);
25603 if (ix86_cmodel == CM_LARGE_PIC
25604 && !TARGET_PECOFF
25605 && MEM_P (fnaddr)
25606 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25607 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25608 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25609 else if (sibcall
25610 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25611 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25613 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25614 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25617 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25619 if (retval)
25621 /* We should add bounds as destination register in case
25622 pointer with bounds may be returned. */
25623 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25625 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25626 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25627 retval = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, retval, b0, b1));
25628 chkp_put_regs_to_expr_list (retval);
25631 call = gen_rtx_SET (VOIDmode, retval, call);
25633 vec[vec_len++] = call;
25635 if (pop)
25637 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25638 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
25639 vec[vec_len++] = pop;
25642 if (TARGET_64BIT_MS_ABI
25643 && (!callarg2 || INTVAL (callarg2) != -2))
25645 int const cregs_size
25646 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25647 int i;
25649 for (i = 0; i < cregs_size; i++)
25651 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25652 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25654 clobber_reg (&use, gen_rtx_REG (mode, regno));
25658 if (vec_len > 1)
25659 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25660 call = emit_call_insn (call);
25661 if (use)
25662 CALL_INSN_FUNCTION_USAGE (call) = use;
25664 return call;
25667 /* Output the assembly for a call instruction. */
25669 const char *
25670 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25672 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25673 bool seh_nop_p = false;
25674 const char *xasm;
25676 if (SIBLING_CALL_P (insn))
25678 if (direct_p)
25679 xasm = "%!jmp\t%P0";
25680 /* SEH epilogue detection requires the indirect branch case
25681 to include REX.W. */
25682 else if (TARGET_SEH)
25683 xasm = "%!rex.W jmp %A0";
25684 else
25685 xasm = "%!jmp\t%A0";
25687 output_asm_insn (xasm, &call_op);
25688 return "";
25691 /* SEH unwinding can require an extra nop to be emitted in several
25692 circumstances. Determine if we have one of those. */
25693 if (TARGET_SEH)
25695 rtx_insn *i;
25697 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25699 /* If we get to another real insn, we don't need the nop. */
25700 if (INSN_P (i))
25701 break;
25703 /* If we get to the epilogue note, prevent a catch region from
25704 being adjacent to the standard epilogue sequence. If non-
25705 call-exceptions, we'll have done this during epilogue emission. */
25706 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25707 && !flag_non_call_exceptions
25708 && !can_throw_internal (insn))
25710 seh_nop_p = true;
25711 break;
25715 /* If we didn't find a real insn following the call, prevent the
25716 unwinder from looking into the next function. */
25717 if (i == NULL)
25718 seh_nop_p = true;
25721 if (direct_p)
25722 xasm = "%!call\t%P0";
25723 else
25724 xasm = "%!call\t%A0";
25726 output_asm_insn (xasm, &call_op);
25728 if (seh_nop_p)
25729 return "nop";
25731 return "";
25734 /* Clear stack slot assignments remembered from previous functions.
25735 This is called from INIT_EXPANDERS once before RTL is emitted for each
25736 function. */
25738 static struct machine_function *
25739 ix86_init_machine_status (void)
25741 struct machine_function *f;
25743 f = ggc_cleared_alloc<machine_function> ();
25744 f->use_fast_prologue_epilogue_nregs = -1;
25745 f->call_abi = ix86_abi;
25747 return f;
25750 /* Return a MEM corresponding to a stack slot with mode MODE.
25751 Allocate a new slot if necessary.
25753 The RTL for a function can have several slots available: N is
25754 which slot to use. */
25757 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25759 struct stack_local_entry *s;
25761 gcc_assert (n < MAX_386_STACK_LOCALS);
25763 for (s = ix86_stack_locals; s; s = s->next)
25764 if (s->mode == mode && s->n == n)
25765 return validize_mem (copy_rtx (s->rtl));
25767 s = ggc_alloc<stack_local_entry> ();
25768 s->n = n;
25769 s->mode = mode;
25770 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25772 s->next = ix86_stack_locals;
25773 ix86_stack_locals = s;
25774 return validize_mem (copy_rtx (s->rtl));
25777 static void
25778 ix86_instantiate_decls (void)
25780 struct stack_local_entry *s;
25782 for (s = ix86_stack_locals; s; s = s->next)
25783 if (s->rtl != NULL_RTX)
25784 instantiate_decl_rtl (s->rtl);
25787 /* Check whether x86 address PARTS is a pc-relative address. */
25789 static bool
25790 rip_relative_addr_p (struct ix86_address *parts)
25792 rtx base, index, disp;
25794 base = parts->base;
25795 index = parts->index;
25796 disp = parts->disp;
25798 if (disp && !base && !index)
25800 if (TARGET_64BIT)
25802 rtx symbol = disp;
25804 if (GET_CODE (disp) == CONST)
25805 symbol = XEXP (disp, 0);
25806 if (GET_CODE (symbol) == PLUS
25807 && CONST_INT_P (XEXP (symbol, 1)))
25808 symbol = XEXP (symbol, 0);
25810 if (GET_CODE (symbol) == LABEL_REF
25811 || (GET_CODE (symbol) == SYMBOL_REF
25812 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25813 || (GET_CODE (symbol) == UNSPEC
25814 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25815 || XINT (symbol, 1) == UNSPEC_PCREL
25816 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25817 return true;
25820 return false;
25823 /* Calculate the length of the memory address in the instruction encoding.
25824 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25825 or other prefixes. We never generate addr32 prefix for LEA insn. */
25828 memory_address_length (rtx addr, bool lea)
25830 struct ix86_address parts;
25831 rtx base, index, disp;
25832 int len;
25833 int ok;
25835 if (GET_CODE (addr) == PRE_DEC
25836 || GET_CODE (addr) == POST_INC
25837 || GET_CODE (addr) == PRE_MODIFY
25838 || GET_CODE (addr) == POST_MODIFY)
25839 return 0;
25841 ok = ix86_decompose_address (addr, &parts);
25842 gcc_assert (ok);
25844 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25846 /* If this is not LEA instruction, add the length of addr32 prefix. */
25847 if (TARGET_64BIT && !lea
25848 && (SImode_address_operand (addr, VOIDmode)
25849 || (parts.base && GET_MODE (parts.base) == SImode)
25850 || (parts.index && GET_MODE (parts.index) == SImode)))
25851 len++;
25853 base = parts.base;
25854 index = parts.index;
25855 disp = parts.disp;
25857 if (base && GET_CODE (base) == SUBREG)
25858 base = SUBREG_REG (base);
25859 if (index && GET_CODE (index) == SUBREG)
25860 index = SUBREG_REG (index);
25862 gcc_assert (base == NULL_RTX || REG_P (base));
25863 gcc_assert (index == NULL_RTX || REG_P (index));
25865 /* Rule of thumb:
25866 - esp as the base always wants an index,
25867 - ebp as the base always wants a displacement,
25868 - r12 as the base always wants an index,
25869 - r13 as the base always wants a displacement. */
25871 /* Register Indirect. */
25872 if (base && !index && !disp)
25874 /* esp (for its index) and ebp (for its displacement) need
25875 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25876 code. */
25877 if (base == arg_pointer_rtx
25878 || base == frame_pointer_rtx
25879 || REGNO (base) == SP_REG
25880 || REGNO (base) == BP_REG
25881 || REGNO (base) == R12_REG
25882 || REGNO (base) == R13_REG)
25883 len++;
25886 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25887 is not disp32, but disp32(%rip), so for disp32
25888 SIB byte is needed, unless print_operand_address
25889 optimizes it into disp32(%rip) or (%rip) is implied
25890 by UNSPEC. */
25891 else if (disp && !base && !index)
25893 len += 4;
25894 if (rip_relative_addr_p (&parts))
25895 len++;
25897 else
25899 /* Find the length of the displacement constant. */
25900 if (disp)
25902 if (base && satisfies_constraint_K (disp))
25903 len += 1;
25904 else
25905 len += 4;
25907 /* ebp always wants a displacement. Similarly r13. */
25908 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25909 len++;
25911 /* An index requires the two-byte modrm form.... */
25912 if (index
25913 /* ...like esp (or r12), which always wants an index. */
25914 || base == arg_pointer_rtx
25915 || base == frame_pointer_rtx
25916 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25917 len++;
25920 return len;
25923 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25924 is set, expect that insn have 8bit immediate alternative. */
25926 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25928 int len = 0;
25929 int i;
25930 extract_insn_cached (insn);
25931 for (i = recog_data.n_operands - 1; i >= 0; --i)
25932 if (CONSTANT_P (recog_data.operand[i]))
25934 enum attr_mode mode = get_attr_mode (insn);
25936 gcc_assert (!len);
25937 if (shortform && CONST_INT_P (recog_data.operand[i]))
25939 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25940 switch (mode)
25942 case MODE_QI:
25943 len = 1;
25944 continue;
25945 case MODE_HI:
25946 ival = trunc_int_for_mode (ival, HImode);
25947 break;
25948 case MODE_SI:
25949 ival = trunc_int_for_mode (ival, SImode);
25950 break;
25951 default:
25952 break;
25954 if (IN_RANGE (ival, -128, 127))
25956 len = 1;
25957 continue;
25960 switch (mode)
25962 case MODE_QI:
25963 len = 1;
25964 break;
25965 case MODE_HI:
25966 len = 2;
25967 break;
25968 case MODE_SI:
25969 len = 4;
25970 break;
25971 /* Immediates for DImode instructions are encoded
25972 as 32bit sign extended values. */
25973 case MODE_DI:
25974 len = 4;
25975 break;
25976 default:
25977 fatal_insn ("unknown insn mode", insn);
25980 return len;
25983 /* Compute default value for "length_address" attribute. */
25985 ix86_attr_length_address_default (rtx_insn *insn)
25987 int i;
25989 if (get_attr_type (insn) == TYPE_LEA)
25991 rtx set = PATTERN (insn), addr;
25993 if (GET_CODE (set) == PARALLEL)
25994 set = XVECEXP (set, 0, 0);
25996 gcc_assert (GET_CODE (set) == SET);
25998 addr = SET_SRC (set);
26000 return memory_address_length (addr, true);
26003 extract_insn_cached (insn);
26004 for (i = recog_data.n_operands - 1; i >= 0; --i)
26005 if (MEM_P (recog_data.operand[i]))
26007 constrain_operands_cached (insn, reload_completed);
26008 if (which_alternative != -1)
26010 const char *constraints = recog_data.constraints[i];
26011 int alt = which_alternative;
26013 while (*constraints == '=' || *constraints == '+')
26014 constraints++;
26015 while (alt-- > 0)
26016 while (*constraints++ != ',')
26018 /* Skip ignored operands. */
26019 if (*constraints == 'X')
26020 continue;
26022 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
26024 return 0;
26027 /* Compute default value for "length_vex" attribute. It includes
26028 2 or 3 byte VEX prefix and 1 opcode byte. */
26031 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
26032 bool has_vex_w)
26034 int i;
26036 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
26037 byte VEX prefix. */
26038 if (!has_0f_opcode || has_vex_w)
26039 return 3 + 1;
26041 /* We can always use 2 byte VEX prefix in 32bit. */
26042 if (!TARGET_64BIT)
26043 return 2 + 1;
26045 extract_insn_cached (insn);
26047 for (i = recog_data.n_operands - 1; i >= 0; --i)
26048 if (REG_P (recog_data.operand[i]))
26050 /* REX.W bit uses 3 byte VEX prefix. */
26051 if (GET_MODE (recog_data.operand[i]) == DImode
26052 && GENERAL_REG_P (recog_data.operand[i]))
26053 return 3 + 1;
26055 else
26057 /* REX.X or REX.B bits use 3 byte VEX prefix. */
26058 if (MEM_P (recog_data.operand[i])
26059 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
26060 return 3 + 1;
26063 return 2 + 1;
26066 /* Return the maximum number of instructions a cpu can issue. */
26068 static int
26069 ix86_issue_rate (void)
26071 switch (ix86_tune)
26073 case PROCESSOR_PENTIUM:
26074 case PROCESSOR_BONNELL:
26075 case PROCESSOR_SILVERMONT:
26076 case PROCESSOR_KNL:
26077 case PROCESSOR_INTEL:
26078 case PROCESSOR_K6:
26079 case PROCESSOR_BTVER2:
26080 case PROCESSOR_PENTIUM4:
26081 case PROCESSOR_NOCONA:
26082 return 2;
26084 case PROCESSOR_PENTIUMPRO:
26085 case PROCESSOR_ATHLON:
26086 case PROCESSOR_K8:
26087 case PROCESSOR_AMDFAM10:
26088 case PROCESSOR_GENERIC:
26089 case PROCESSOR_BTVER1:
26090 return 3;
26092 case PROCESSOR_BDVER1:
26093 case PROCESSOR_BDVER2:
26094 case PROCESSOR_BDVER3:
26095 case PROCESSOR_BDVER4:
26096 case PROCESSOR_CORE2:
26097 case PROCESSOR_NEHALEM:
26098 case PROCESSOR_SANDYBRIDGE:
26099 case PROCESSOR_HASWELL:
26100 return 4;
26102 default:
26103 return 1;
26107 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
26108 by DEP_INSN and nothing set by DEP_INSN. */
26110 static bool
26111 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
26113 rtx set, set2;
26115 /* Simplify the test for uninteresting insns. */
26116 if (insn_type != TYPE_SETCC
26117 && insn_type != TYPE_ICMOV
26118 && insn_type != TYPE_FCMOV
26119 && insn_type != TYPE_IBR)
26120 return false;
26122 if ((set = single_set (dep_insn)) != 0)
26124 set = SET_DEST (set);
26125 set2 = NULL_RTX;
26127 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
26128 && XVECLEN (PATTERN (dep_insn), 0) == 2
26129 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
26130 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
26132 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26133 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26135 else
26136 return false;
26138 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
26139 return false;
26141 /* This test is true if the dependent insn reads the flags but
26142 not any other potentially set register. */
26143 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
26144 return false;
26146 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
26147 return false;
26149 return true;
26152 /* Return true iff USE_INSN has a memory address with operands set by
26153 SET_INSN. */
26155 bool
26156 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
26158 int i;
26159 extract_insn_cached (use_insn);
26160 for (i = recog_data.n_operands - 1; i >= 0; --i)
26161 if (MEM_P (recog_data.operand[i]))
26163 rtx addr = XEXP (recog_data.operand[i], 0);
26164 return modified_in_p (addr, set_insn) != 0;
26166 return false;
26169 /* Helper function for exact_store_load_dependency.
26170 Return true if addr is found in insn. */
26171 static bool
26172 exact_dependency_1 (rtx addr, rtx insn)
26174 enum rtx_code code;
26175 const char *format_ptr;
26176 int i, j;
26178 code = GET_CODE (insn);
26179 switch (code)
26181 case MEM:
26182 if (rtx_equal_p (addr, insn))
26183 return true;
26184 break;
26185 case REG:
26186 CASE_CONST_ANY:
26187 case SYMBOL_REF:
26188 case CODE_LABEL:
26189 case PC:
26190 case CC0:
26191 case EXPR_LIST:
26192 return false;
26193 default:
26194 break;
26197 format_ptr = GET_RTX_FORMAT (code);
26198 for (i = 0; i < GET_RTX_LENGTH (code); i++)
26200 switch (*format_ptr++)
26202 case 'e':
26203 if (exact_dependency_1 (addr, XEXP (insn, i)))
26204 return true;
26205 break;
26206 case 'E':
26207 for (j = 0; j < XVECLEN (insn, i); j++)
26208 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
26209 return true;
26210 break;
26213 return false;
26216 /* Return true if there exists exact dependency for store & load, i.e.
26217 the same memory address is used in them. */
26218 static bool
26219 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
26221 rtx set1, set2;
26223 set1 = single_set (store);
26224 if (!set1)
26225 return false;
26226 if (!MEM_P (SET_DEST (set1)))
26227 return false;
26228 set2 = single_set (load);
26229 if (!set2)
26230 return false;
26231 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26232 return true;
26233 return false;
26236 static int
26237 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26239 enum attr_type insn_type, dep_insn_type;
26240 enum attr_memory memory;
26241 rtx set, set2;
26242 int dep_insn_code_number;
26244 /* Anti and output dependencies have zero cost on all CPUs. */
26245 if (REG_NOTE_KIND (link) != 0)
26246 return 0;
26248 dep_insn_code_number = recog_memoized (dep_insn);
26250 /* If we can't recognize the insns, we can't really do anything. */
26251 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26252 return cost;
26254 insn_type = get_attr_type (insn);
26255 dep_insn_type = get_attr_type (dep_insn);
26257 switch (ix86_tune)
26259 case PROCESSOR_PENTIUM:
26260 /* Address Generation Interlock adds a cycle of latency. */
26261 if (insn_type == TYPE_LEA)
26263 rtx addr = PATTERN (insn);
26265 if (GET_CODE (addr) == PARALLEL)
26266 addr = XVECEXP (addr, 0, 0);
26268 gcc_assert (GET_CODE (addr) == SET);
26270 addr = SET_SRC (addr);
26271 if (modified_in_p (addr, dep_insn))
26272 cost += 1;
26274 else if (ix86_agi_dependent (dep_insn, insn))
26275 cost += 1;
26277 /* ??? Compares pair with jump/setcc. */
26278 if (ix86_flags_dependent (insn, dep_insn, insn_type))
26279 cost = 0;
26281 /* Floating point stores require value to be ready one cycle earlier. */
26282 if (insn_type == TYPE_FMOV
26283 && get_attr_memory (insn) == MEMORY_STORE
26284 && !ix86_agi_dependent (dep_insn, insn))
26285 cost += 1;
26286 break;
26288 case PROCESSOR_PENTIUMPRO:
26289 /* INT->FP conversion is expensive. */
26290 if (get_attr_fp_int_src (dep_insn))
26291 cost += 5;
26293 /* There is one cycle extra latency between an FP op and a store. */
26294 if (insn_type == TYPE_FMOV
26295 && (set = single_set (dep_insn)) != NULL_RTX
26296 && (set2 = single_set (insn)) != NULL_RTX
26297 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26298 && MEM_P (SET_DEST (set2)))
26299 cost += 1;
26301 memory = get_attr_memory (insn);
26303 /* Show ability of reorder buffer to hide latency of load by executing
26304 in parallel with previous instruction in case
26305 previous instruction is not needed to compute the address. */
26306 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26307 && !ix86_agi_dependent (dep_insn, insn))
26309 /* Claim moves to take one cycle, as core can issue one load
26310 at time and the next load can start cycle later. */
26311 if (dep_insn_type == TYPE_IMOV
26312 || dep_insn_type == TYPE_FMOV)
26313 cost = 1;
26314 else if (cost > 1)
26315 cost--;
26317 break;
26319 case PROCESSOR_K6:
26320 /* The esp dependency is resolved before
26321 the instruction is really finished. */
26322 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26323 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26324 return 1;
26326 /* INT->FP conversion is expensive. */
26327 if (get_attr_fp_int_src (dep_insn))
26328 cost += 5;
26330 memory = get_attr_memory (insn);
26332 /* Show ability of reorder buffer to hide latency of load by executing
26333 in parallel with previous instruction in case
26334 previous instruction is not needed to compute the address. */
26335 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26336 && !ix86_agi_dependent (dep_insn, insn))
26338 /* Claim moves to take one cycle, as core can issue one load
26339 at time and the next load can start cycle later. */
26340 if (dep_insn_type == TYPE_IMOV
26341 || dep_insn_type == TYPE_FMOV)
26342 cost = 1;
26343 else if (cost > 2)
26344 cost -= 2;
26345 else
26346 cost = 1;
26348 break;
26350 case PROCESSOR_AMDFAM10:
26351 case PROCESSOR_BDVER1:
26352 case PROCESSOR_BDVER2:
26353 case PROCESSOR_BDVER3:
26354 case PROCESSOR_BDVER4:
26355 case PROCESSOR_BTVER1:
26356 case PROCESSOR_BTVER2:
26357 case PROCESSOR_GENERIC:
26358 /* Stack engine allows to execute push&pop instructions in parall. */
26359 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26360 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26361 return 0;
26362 /* FALLTHRU */
26364 case PROCESSOR_ATHLON:
26365 case PROCESSOR_K8:
26366 memory = get_attr_memory (insn);
26368 /* Show ability of reorder buffer to hide latency of load by executing
26369 in parallel with previous instruction in case
26370 previous instruction is not needed to compute the address. */
26371 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26372 && !ix86_agi_dependent (dep_insn, insn))
26374 enum attr_unit unit = get_attr_unit (insn);
26375 int loadcost = 3;
26377 /* Because of the difference between the length of integer and
26378 floating unit pipeline preparation stages, the memory operands
26379 for floating point are cheaper.
26381 ??? For Athlon it the difference is most probably 2. */
26382 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26383 loadcost = 3;
26384 else
26385 loadcost = TARGET_ATHLON ? 2 : 0;
26387 if (cost >= loadcost)
26388 cost -= loadcost;
26389 else
26390 cost = 0;
26392 break;
26394 case PROCESSOR_CORE2:
26395 case PROCESSOR_NEHALEM:
26396 case PROCESSOR_SANDYBRIDGE:
26397 case PROCESSOR_HASWELL:
26398 /* Stack engine allows to execute push&pop instructions in parall. */
26399 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26400 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26401 return 0;
26403 memory = get_attr_memory (insn);
26405 /* Show ability of reorder buffer to hide latency of load by executing
26406 in parallel with previous instruction in case
26407 previous instruction is not needed to compute the address. */
26408 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26409 && !ix86_agi_dependent (dep_insn, insn))
26411 if (cost >= 4)
26412 cost -= 4;
26413 else
26414 cost = 0;
26416 break;
26418 case PROCESSOR_SILVERMONT:
26419 case PROCESSOR_KNL:
26420 case PROCESSOR_INTEL:
26421 if (!reload_completed)
26422 return cost;
26424 /* Increase cost of integer loads. */
26425 memory = get_attr_memory (dep_insn);
26426 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26428 enum attr_unit unit = get_attr_unit (dep_insn);
26429 if (unit == UNIT_INTEGER && cost == 1)
26431 if (memory == MEMORY_LOAD)
26432 cost = 3;
26433 else
26435 /* Increase cost of ld/st for short int types only
26436 because of store forwarding issue. */
26437 rtx set = single_set (dep_insn);
26438 if (set && (GET_MODE (SET_DEST (set)) == QImode
26439 || GET_MODE (SET_DEST (set)) == HImode))
26441 /* Increase cost of store/load insn if exact
26442 dependence exists and it is load insn. */
26443 enum attr_memory insn_memory = get_attr_memory (insn);
26444 if (insn_memory == MEMORY_LOAD
26445 && exact_store_load_dependency (dep_insn, insn))
26446 cost = 3;
26452 default:
26453 break;
26456 return cost;
26459 /* How many alternative schedules to try. This should be as wide as the
26460 scheduling freedom in the DFA, but no wider. Making this value too
26461 large results extra work for the scheduler. */
26463 static int
26464 ia32_multipass_dfa_lookahead (void)
26466 switch (ix86_tune)
26468 case PROCESSOR_PENTIUM:
26469 return 2;
26471 case PROCESSOR_PENTIUMPRO:
26472 case PROCESSOR_K6:
26473 return 1;
26475 case PROCESSOR_BDVER1:
26476 case PROCESSOR_BDVER2:
26477 case PROCESSOR_BDVER3:
26478 case PROCESSOR_BDVER4:
26479 /* We use lookahead value 4 for BD both before and after reload
26480 schedules. Plan is to have value 8 included for O3. */
26481 return 4;
26483 case PROCESSOR_CORE2:
26484 case PROCESSOR_NEHALEM:
26485 case PROCESSOR_SANDYBRIDGE:
26486 case PROCESSOR_HASWELL:
26487 case PROCESSOR_BONNELL:
26488 case PROCESSOR_SILVERMONT:
26489 case PROCESSOR_KNL:
26490 case PROCESSOR_INTEL:
26491 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26492 as many instructions can be executed on a cycle, i.e.,
26493 issue_rate. I wonder why tuning for many CPUs does not do this. */
26494 if (reload_completed)
26495 return ix86_issue_rate ();
26496 /* Don't use lookahead for pre-reload schedule to save compile time. */
26497 return 0;
26499 default:
26500 return 0;
26504 /* Return true if target platform supports macro-fusion. */
26506 static bool
26507 ix86_macro_fusion_p ()
26509 return TARGET_FUSE_CMP_AND_BRANCH;
26512 /* Check whether current microarchitecture support macro fusion
26513 for insn pair "CONDGEN + CONDJMP". Refer to
26514 "Intel Architectures Optimization Reference Manual". */
26516 static bool
26517 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26519 rtx src, dest;
26520 enum rtx_code ccode;
26521 rtx compare_set = NULL_RTX, test_if, cond;
26522 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26524 if (!any_condjump_p (condjmp))
26525 return false;
26527 if (get_attr_type (condgen) != TYPE_TEST
26528 && get_attr_type (condgen) != TYPE_ICMP
26529 && get_attr_type (condgen) != TYPE_INCDEC
26530 && get_attr_type (condgen) != TYPE_ALU)
26531 return false;
26533 compare_set = single_set (condgen);
26534 if (compare_set == NULL_RTX
26535 && !TARGET_FUSE_ALU_AND_BRANCH)
26536 return false;
26538 if (compare_set == NULL_RTX)
26540 int i;
26541 rtx pat = PATTERN (condgen);
26542 for (i = 0; i < XVECLEN (pat, 0); i++)
26543 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26545 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26546 if (GET_CODE (set_src) == COMPARE)
26547 compare_set = XVECEXP (pat, 0, i);
26548 else
26549 alu_set = XVECEXP (pat, 0, i);
26552 if (compare_set == NULL_RTX)
26553 return false;
26554 src = SET_SRC (compare_set);
26555 if (GET_CODE (src) != COMPARE)
26556 return false;
26558 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26559 supported. */
26560 if ((MEM_P (XEXP (src, 0))
26561 && CONST_INT_P (XEXP (src, 1)))
26562 || (MEM_P (XEXP (src, 1))
26563 && CONST_INT_P (XEXP (src, 0))))
26564 return false;
26566 /* No fusion for RIP-relative address. */
26567 if (MEM_P (XEXP (src, 0)))
26568 addr = XEXP (XEXP (src, 0), 0);
26569 else if (MEM_P (XEXP (src, 1)))
26570 addr = XEXP (XEXP (src, 1), 0);
26572 if (addr) {
26573 ix86_address parts;
26574 int ok = ix86_decompose_address (addr, &parts);
26575 gcc_assert (ok);
26577 if (rip_relative_addr_p (&parts))
26578 return false;
26581 test_if = SET_SRC (pc_set (condjmp));
26582 cond = XEXP (test_if, 0);
26583 ccode = GET_CODE (cond);
26584 /* Check whether conditional jump use Sign or Overflow Flags. */
26585 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26586 && (ccode == GE
26587 || ccode == GT
26588 || ccode == LE
26589 || ccode == LT))
26590 return false;
26592 /* Return true for TYPE_TEST and TYPE_ICMP. */
26593 if (get_attr_type (condgen) == TYPE_TEST
26594 || get_attr_type (condgen) == TYPE_ICMP)
26595 return true;
26597 /* The following is the case that macro-fusion for alu + jmp. */
26598 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26599 return false;
26601 /* No fusion for alu op with memory destination operand. */
26602 dest = SET_DEST (alu_set);
26603 if (MEM_P (dest))
26604 return false;
26606 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26607 supported. */
26608 if (get_attr_type (condgen) == TYPE_INCDEC
26609 && (ccode == GEU
26610 || ccode == GTU
26611 || ccode == LEU
26612 || ccode == LTU))
26613 return false;
26615 return true;
26618 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26619 execution. It is applied if
26620 (1) IMUL instruction is on the top of list;
26621 (2) There exists the only producer of independent IMUL instruction in
26622 ready list.
26623 Return index of IMUL producer if it was found and -1 otherwise. */
26624 static int
26625 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26627 rtx_insn *insn;
26628 rtx set, insn1, insn2;
26629 sd_iterator_def sd_it;
26630 dep_t dep;
26631 int index = -1;
26632 int i;
26634 if (!TARGET_BONNELL)
26635 return index;
26637 /* Check that IMUL instruction is on the top of ready list. */
26638 insn = ready[n_ready - 1];
26639 set = single_set (insn);
26640 if (!set)
26641 return index;
26642 if (!(GET_CODE (SET_SRC (set)) == MULT
26643 && GET_MODE (SET_SRC (set)) == SImode))
26644 return index;
26646 /* Search for producer of independent IMUL instruction. */
26647 for (i = n_ready - 2; i >= 0; i--)
26649 insn = ready[i];
26650 if (!NONDEBUG_INSN_P (insn))
26651 continue;
26652 /* Skip IMUL instruction. */
26653 insn2 = PATTERN (insn);
26654 if (GET_CODE (insn2) == PARALLEL)
26655 insn2 = XVECEXP (insn2, 0, 0);
26656 if (GET_CODE (insn2) == SET
26657 && GET_CODE (SET_SRC (insn2)) == MULT
26658 && GET_MODE (SET_SRC (insn2)) == SImode)
26659 continue;
26661 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26663 rtx con;
26664 con = DEP_CON (dep);
26665 if (!NONDEBUG_INSN_P (con))
26666 continue;
26667 insn1 = PATTERN (con);
26668 if (GET_CODE (insn1) == PARALLEL)
26669 insn1 = XVECEXP (insn1, 0, 0);
26671 if (GET_CODE (insn1) == SET
26672 && GET_CODE (SET_SRC (insn1)) == MULT
26673 && GET_MODE (SET_SRC (insn1)) == SImode)
26675 sd_iterator_def sd_it1;
26676 dep_t dep1;
26677 /* Check if there is no other dependee for IMUL. */
26678 index = i;
26679 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26681 rtx pro;
26682 pro = DEP_PRO (dep1);
26683 if (!NONDEBUG_INSN_P (pro))
26684 continue;
26685 if (pro != insn)
26686 index = -1;
26688 if (index >= 0)
26689 break;
26692 if (index >= 0)
26693 break;
26695 return index;
26698 /* Try to find the best candidate on the top of ready list if two insns
26699 have the same priority - candidate is best if its dependees were
26700 scheduled earlier. Applied for Silvermont only.
26701 Return true if top 2 insns must be interchanged. */
26702 static bool
26703 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26705 rtx_insn *top = ready[n_ready - 1];
26706 rtx_insn *next = ready[n_ready - 2];
26707 rtx set;
26708 sd_iterator_def sd_it;
26709 dep_t dep;
26710 int clock1 = -1;
26711 int clock2 = -1;
26712 #define INSN_TICK(INSN) (HID (INSN)->tick)
26714 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26715 return false;
26717 if (!NONDEBUG_INSN_P (top))
26718 return false;
26719 if (!NONJUMP_INSN_P (top))
26720 return false;
26721 if (!NONDEBUG_INSN_P (next))
26722 return false;
26723 if (!NONJUMP_INSN_P (next))
26724 return false;
26725 set = single_set (top);
26726 if (!set)
26727 return false;
26728 set = single_set (next);
26729 if (!set)
26730 return false;
26732 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26734 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26735 return false;
26736 /* Determine winner more precise. */
26737 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26739 rtx pro;
26740 pro = DEP_PRO (dep);
26741 if (!NONDEBUG_INSN_P (pro))
26742 continue;
26743 if (INSN_TICK (pro) > clock1)
26744 clock1 = INSN_TICK (pro);
26746 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26748 rtx pro;
26749 pro = DEP_PRO (dep);
26750 if (!NONDEBUG_INSN_P (pro))
26751 continue;
26752 if (INSN_TICK (pro) > clock2)
26753 clock2 = INSN_TICK (pro);
26756 if (clock1 == clock2)
26758 /* Determine winner - load must win. */
26759 enum attr_memory memory1, memory2;
26760 memory1 = get_attr_memory (top);
26761 memory2 = get_attr_memory (next);
26762 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26763 return true;
26765 return (bool) (clock2 < clock1);
26767 return false;
26768 #undef INSN_TICK
26771 /* Perform possible reodering of ready list for Atom/Silvermont only.
26772 Return issue rate. */
26773 static int
26774 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26775 int *pn_ready, int clock_var)
26777 int issue_rate = -1;
26778 int n_ready = *pn_ready;
26779 int i;
26780 rtx_insn *insn;
26781 int index = -1;
26783 /* Set up issue rate. */
26784 issue_rate = ix86_issue_rate ();
26786 /* Do reodering for BONNELL/SILVERMONT only. */
26787 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26788 return issue_rate;
26790 /* Nothing to do if ready list contains only 1 instruction. */
26791 if (n_ready <= 1)
26792 return issue_rate;
26794 /* Do reodering for post-reload scheduler only. */
26795 if (!reload_completed)
26796 return issue_rate;
26798 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26800 if (sched_verbose > 1)
26801 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26802 INSN_UID (ready[index]));
26804 /* Put IMUL producer (ready[index]) at the top of ready list. */
26805 insn = ready[index];
26806 for (i = index; i < n_ready - 1; i++)
26807 ready[i] = ready[i + 1];
26808 ready[n_ready - 1] = insn;
26809 return issue_rate;
26812 /* Skip selective scheduling since HID is not populated in it. */
26813 if (clock_var != 0
26814 && !sel_sched_p ()
26815 && swap_top_of_ready_list (ready, n_ready))
26817 if (sched_verbose > 1)
26818 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26819 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26820 /* Swap 2 top elements of ready list. */
26821 insn = ready[n_ready - 1];
26822 ready[n_ready - 1] = ready[n_ready - 2];
26823 ready[n_ready - 2] = insn;
26825 return issue_rate;
26828 static bool
26829 ix86_class_likely_spilled_p (reg_class_t);
26831 /* Returns true if lhs of insn is HW function argument register and set up
26832 is_spilled to true if it is likely spilled HW register. */
26833 static bool
26834 insn_is_function_arg (rtx insn, bool* is_spilled)
26836 rtx dst;
26838 if (!NONDEBUG_INSN_P (insn))
26839 return false;
26840 /* Call instructions are not movable, ignore it. */
26841 if (CALL_P (insn))
26842 return false;
26843 insn = PATTERN (insn);
26844 if (GET_CODE (insn) == PARALLEL)
26845 insn = XVECEXP (insn, 0, 0);
26846 if (GET_CODE (insn) != SET)
26847 return false;
26848 dst = SET_DEST (insn);
26849 if (REG_P (dst) && HARD_REGISTER_P (dst)
26850 && ix86_function_arg_regno_p (REGNO (dst)))
26852 /* Is it likely spilled HW register? */
26853 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26854 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26855 *is_spilled = true;
26856 return true;
26858 return false;
26861 /* Add output dependencies for chain of function adjacent arguments if only
26862 there is a move to likely spilled HW register. Return first argument
26863 if at least one dependence was added or NULL otherwise. */
26864 static rtx_insn *
26865 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26867 rtx_insn *insn;
26868 rtx_insn *last = call;
26869 rtx_insn *first_arg = NULL;
26870 bool is_spilled = false;
26872 head = PREV_INSN (head);
26874 /* Find nearest to call argument passing instruction. */
26875 while (true)
26877 last = PREV_INSN (last);
26878 if (last == head)
26879 return NULL;
26880 if (!NONDEBUG_INSN_P (last))
26881 continue;
26882 if (insn_is_function_arg (last, &is_spilled))
26883 break;
26884 return NULL;
26887 first_arg = last;
26888 while (true)
26890 insn = PREV_INSN (last);
26891 if (!INSN_P (insn))
26892 break;
26893 if (insn == head)
26894 break;
26895 if (!NONDEBUG_INSN_P (insn))
26897 last = insn;
26898 continue;
26900 if (insn_is_function_arg (insn, &is_spilled))
26902 /* Add output depdendence between two function arguments if chain
26903 of output arguments contains likely spilled HW registers. */
26904 if (is_spilled)
26905 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26906 first_arg = last = insn;
26908 else
26909 break;
26911 if (!is_spilled)
26912 return NULL;
26913 return first_arg;
26916 /* Add output or anti dependency from insn to first_arg to restrict its code
26917 motion. */
26918 static void
26919 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26921 rtx set;
26922 rtx tmp;
26924 /* Add anti dependencies for bounds stores. */
26925 if (INSN_P (insn)
26926 && GET_CODE (PATTERN (insn)) == PARALLEL
26927 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
26928 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
26930 add_dependence (first_arg, insn, REG_DEP_ANTI);
26931 return;
26934 set = single_set (insn);
26935 if (!set)
26936 return;
26937 tmp = SET_DEST (set);
26938 if (REG_P (tmp))
26940 /* Add output dependency to the first function argument. */
26941 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26942 return;
26944 /* Add anti dependency. */
26945 add_dependence (first_arg, insn, REG_DEP_ANTI);
26948 /* Avoid cross block motion of function argument through adding dependency
26949 from the first non-jump instruction in bb. */
26950 static void
26951 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26953 rtx_insn *insn = BB_END (bb);
26955 while (insn)
26957 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26959 rtx set = single_set (insn);
26960 if (set)
26962 avoid_func_arg_motion (arg, insn);
26963 return;
26966 if (insn == BB_HEAD (bb))
26967 return;
26968 insn = PREV_INSN (insn);
26972 /* Hook for pre-reload schedule - avoid motion of function arguments
26973 passed in likely spilled HW registers. */
26974 static void
26975 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26977 rtx_insn *insn;
26978 rtx_insn *first_arg = NULL;
26979 if (reload_completed)
26980 return;
26981 while (head != tail && DEBUG_INSN_P (head))
26982 head = NEXT_INSN (head);
26983 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26984 if (INSN_P (insn) && CALL_P (insn))
26986 first_arg = add_parameter_dependencies (insn, head);
26987 if (first_arg)
26989 /* Add dependee for first argument to predecessors if only
26990 region contains more than one block. */
26991 basic_block bb = BLOCK_FOR_INSN (insn);
26992 int rgn = CONTAINING_RGN (bb->index);
26993 int nr_blks = RGN_NR_BLOCKS (rgn);
26994 /* Skip trivial regions and region head blocks that can have
26995 predecessors outside of region. */
26996 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26998 edge e;
26999 edge_iterator ei;
27001 /* Regions are SCCs with the exception of selective
27002 scheduling with pipelining of outer blocks enabled.
27003 So also check that immediate predecessors of a non-head
27004 block are in the same region. */
27005 FOR_EACH_EDGE (e, ei, bb->preds)
27007 /* Avoid creating of loop-carried dependencies through
27008 using topological ordering in the region. */
27009 if (rgn == CONTAINING_RGN (e->src->index)
27010 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
27011 add_dependee_for_func_arg (first_arg, e->src);
27014 insn = first_arg;
27015 if (insn == head)
27016 break;
27019 else if (first_arg)
27020 avoid_func_arg_motion (first_arg, insn);
27023 /* Hook for pre-reload schedule - set priority of moves from likely spilled
27024 HW registers to maximum, to schedule them at soon as possible. These are
27025 moves from function argument registers at the top of the function entry
27026 and moves from function return value registers after call. */
27027 static int
27028 ix86_adjust_priority (rtx_insn *insn, int priority)
27030 rtx set;
27032 if (reload_completed)
27033 return priority;
27035 if (!NONDEBUG_INSN_P (insn))
27036 return priority;
27038 set = single_set (insn);
27039 if (set)
27041 rtx tmp = SET_SRC (set);
27042 if (REG_P (tmp)
27043 && HARD_REGISTER_P (tmp)
27044 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
27045 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
27046 return current_sched_info->sched_max_insns_priority;
27049 return priority;
27052 /* Model decoder of Core 2/i7.
27053 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
27054 track the instruction fetch block boundaries and make sure that long
27055 (9+ bytes) instructions are assigned to D0. */
27057 /* Maximum length of an insn that can be handled by
27058 a secondary decoder unit. '8' for Core 2/i7. */
27059 static int core2i7_secondary_decoder_max_insn_size;
27061 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
27062 '16' for Core 2/i7. */
27063 static int core2i7_ifetch_block_size;
27065 /* Maximum number of instructions decoder can handle per cycle.
27066 '6' for Core 2/i7. */
27067 static int core2i7_ifetch_block_max_insns;
27069 typedef struct ix86_first_cycle_multipass_data_ *
27070 ix86_first_cycle_multipass_data_t;
27071 typedef const struct ix86_first_cycle_multipass_data_ *
27072 const_ix86_first_cycle_multipass_data_t;
27074 /* A variable to store target state across calls to max_issue within
27075 one cycle. */
27076 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
27077 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
27079 /* Initialize DATA. */
27080 static void
27081 core2i7_first_cycle_multipass_init (void *_data)
27083 ix86_first_cycle_multipass_data_t data
27084 = (ix86_first_cycle_multipass_data_t) _data;
27086 data->ifetch_block_len = 0;
27087 data->ifetch_block_n_insns = 0;
27088 data->ready_try_change = NULL;
27089 data->ready_try_change_size = 0;
27092 /* Advancing the cycle; reset ifetch block counts. */
27093 static void
27094 core2i7_dfa_post_advance_cycle (void)
27096 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
27098 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27100 data->ifetch_block_len = 0;
27101 data->ifetch_block_n_insns = 0;
27104 static int min_insn_size (rtx_insn *);
27106 /* Filter out insns from ready_try that the core will not be able to issue
27107 on current cycle due to decoder. */
27108 static void
27109 core2i7_first_cycle_multipass_filter_ready_try
27110 (const_ix86_first_cycle_multipass_data_t data,
27111 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
27113 while (n_ready--)
27115 rtx_insn *insn;
27116 int insn_size;
27118 if (ready_try[n_ready])
27119 continue;
27121 insn = get_ready_element (n_ready);
27122 insn_size = min_insn_size (insn);
27124 if (/* If this is a too long an insn for a secondary decoder ... */
27125 (!first_cycle_insn_p
27126 && insn_size > core2i7_secondary_decoder_max_insn_size)
27127 /* ... or it would not fit into the ifetch block ... */
27128 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
27129 /* ... or the decoder is full already ... */
27130 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
27131 /* ... mask the insn out. */
27133 ready_try[n_ready] = 1;
27135 if (data->ready_try_change)
27136 bitmap_set_bit (data->ready_try_change, n_ready);
27141 /* Prepare for a new round of multipass lookahead scheduling. */
27142 static void
27143 core2i7_first_cycle_multipass_begin (void *_data,
27144 signed char *ready_try, int n_ready,
27145 bool first_cycle_insn_p)
27147 ix86_first_cycle_multipass_data_t data
27148 = (ix86_first_cycle_multipass_data_t) _data;
27149 const_ix86_first_cycle_multipass_data_t prev_data
27150 = ix86_first_cycle_multipass_data;
27152 /* Restore the state from the end of the previous round. */
27153 data->ifetch_block_len = prev_data->ifetch_block_len;
27154 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
27156 /* Filter instructions that cannot be issued on current cycle due to
27157 decoder restrictions. */
27158 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27159 first_cycle_insn_p);
27162 /* INSN is being issued in current solution. Account for its impact on
27163 the decoder model. */
27164 static void
27165 core2i7_first_cycle_multipass_issue (void *_data,
27166 signed char *ready_try, int n_ready,
27167 rtx_insn *insn, const void *_prev_data)
27169 ix86_first_cycle_multipass_data_t data
27170 = (ix86_first_cycle_multipass_data_t) _data;
27171 const_ix86_first_cycle_multipass_data_t prev_data
27172 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
27174 int insn_size = min_insn_size (insn);
27176 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
27177 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
27178 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
27179 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27181 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
27182 if (!data->ready_try_change)
27184 data->ready_try_change = sbitmap_alloc (n_ready);
27185 data->ready_try_change_size = n_ready;
27187 else if (data->ready_try_change_size < n_ready)
27189 data->ready_try_change = sbitmap_resize (data->ready_try_change,
27190 n_ready, 0);
27191 data->ready_try_change_size = n_ready;
27193 bitmap_clear (data->ready_try_change);
27195 /* Filter out insns from ready_try that the core will not be able to issue
27196 on current cycle due to decoder. */
27197 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27198 false);
27201 /* Revert the effect on ready_try. */
27202 static void
27203 core2i7_first_cycle_multipass_backtrack (const void *_data,
27204 signed char *ready_try,
27205 int n_ready ATTRIBUTE_UNUSED)
27207 const_ix86_first_cycle_multipass_data_t data
27208 = (const_ix86_first_cycle_multipass_data_t) _data;
27209 unsigned int i = 0;
27210 sbitmap_iterator sbi;
27212 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
27213 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
27215 ready_try[i] = 0;
27219 /* Save the result of multipass lookahead scheduling for the next round. */
27220 static void
27221 core2i7_first_cycle_multipass_end (const void *_data)
27223 const_ix86_first_cycle_multipass_data_t data
27224 = (const_ix86_first_cycle_multipass_data_t) _data;
27225 ix86_first_cycle_multipass_data_t next_data
27226 = ix86_first_cycle_multipass_data;
27228 if (data != NULL)
27230 next_data->ifetch_block_len = data->ifetch_block_len;
27231 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
27235 /* Deallocate target data. */
27236 static void
27237 core2i7_first_cycle_multipass_fini (void *_data)
27239 ix86_first_cycle_multipass_data_t data
27240 = (ix86_first_cycle_multipass_data_t) _data;
27242 if (data->ready_try_change)
27244 sbitmap_free (data->ready_try_change);
27245 data->ready_try_change = NULL;
27246 data->ready_try_change_size = 0;
27250 /* Prepare for scheduling pass. */
27251 static void
27252 ix86_sched_init_global (FILE *, int, int)
27254 /* Install scheduling hooks for current CPU. Some of these hooks are used
27255 in time-critical parts of the scheduler, so we only set them up when
27256 they are actually used. */
27257 switch (ix86_tune)
27259 case PROCESSOR_CORE2:
27260 case PROCESSOR_NEHALEM:
27261 case PROCESSOR_SANDYBRIDGE:
27262 case PROCESSOR_HASWELL:
27263 /* Do not perform multipass scheduling for pre-reload schedule
27264 to save compile time. */
27265 if (reload_completed)
27267 targetm.sched.dfa_post_advance_cycle
27268 = core2i7_dfa_post_advance_cycle;
27269 targetm.sched.first_cycle_multipass_init
27270 = core2i7_first_cycle_multipass_init;
27271 targetm.sched.first_cycle_multipass_begin
27272 = core2i7_first_cycle_multipass_begin;
27273 targetm.sched.first_cycle_multipass_issue
27274 = core2i7_first_cycle_multipass_issue;
27275 targetm.sched.first_cycle_multipass_backtrack
27276 = core2i7_first_cycle_multipass_backtrack;
27277 targetm.sched.first_cycle_multipass_end
27278 = core2i7_first_cycle_multipass_end;
27279 targetm.sched.first_cycle_multipass_fini
27280 = core2i7_first_cycle_multipass_fini;
27282 /* Set decoder parameters. */
27283 core2i7_secondary_decoder_max_insn_size = 8;
27284 core2i7_ifetch_block_size = 16;
27285 core2i7_ifetch_block_max_insns = 6;
27286 break;
27288 /* ... Fall through ... */
27289 default:
27290 targetm.sched.dfa_post_advance_cycle = NULL;
27291 targetm.sched.first_cycle_multipass_init = NULL;
27292 targetm.sched.first_cycle_multipass_begin = NULL;
27293 targetm.sched.first_cycle_multipass_issue = NULL;
27294 targetm.sched.first_cycle_multipass_backtrack = NULL;
27295 targetm.sched.first_cycle_multipass_end = NULL;
27296 targetm.sched.first_cycle_multipass_fini = NULL;
27297 break;
27302 /* Compute the alignment given to a constant that is being placed in memory.
27303 EXP is the constant and ALIGN is the alignment that the object would
27304 ordinarily have.
27305 The value of this function is used instead of that alignment to align
27306 the object. */
27309 ix86_constant_alignment (tree exp, int align)
27311 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27312 || TREE_CODE (exp) == INTEGER_CST)
27314 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27315 return 64;
27316 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27317 return 128;
27319 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27320 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27321 return BITS_PER_WORD;
27323 return align;
27326 /* Compute the alignment for a static variable.
27327 TYPE is the data type, and ALIGN is the alignment that
27328 the object would ordinarily have. The value of this function is used
27329 instead of that alignment to align the object. */
27332 ix86_data_alignment (tree type, int align, bool opt)
27334 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27335 for symbols from other compilation units or symbols that don't need
27336 to bind locally. In order to preserve some ABI compatibility with
27337 those compilers, ensure we don't decrease alignment from what we
27338 used to assume. */
27340 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
27342 /* A data structure, equal or greater than the size of a cache line
27343 (64 bytes in the Pentium 4 and other recent Intel processors, including
27344 processors based on Intel Core microarchitecture) should be aligned
27345 so that its base address is a multiple of a cache line size. */
27347 int max_align
27348 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27350 if (max_align < BITS_PER_WORD)
27351 max_align = BITS_PER_WORD;
27353 switch (ix86_align_data_type)
27355 case ix86_align_data_type_abi: opt = false; break;
27356 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
27357 case ix86_align_data_type_cacheline: break;
27360 if (opt
27361 && AGGREGATE_TYPE_P (type)
27362 && TYPE_SIZE (type)
27363 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27365 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27366 && align < max_align_compat)
27367 align = max_align_compat;
27368 if (wi::geu_p (TYPE_SIZE (type), max_align)
27369 && align < max_align)
27370 align = max_align;
27373 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27374 to 16byte boundary. */
27375 if (TARGET_64BIT)
27377 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27378 && TYPE_SIZE (type)
27379 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27380 && wi::geu_p (TYPE_SIZE (type), 128)
27381 && align < 128)
27382 return 128;
27385 if (!opt)
27386 return align;
27388 if (TREE_CODE (type) == ARRAY_TYPE)
27390 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27391 return 64;
27392 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27393 return 128;
27395 else if (TREE_CODE (type) == COMPLEX_TYPE)
27398 if (TYPE_MODE (type) == DCmode && align < 64)
27399 return 64;
27400 if ((TYPE_MODE (type) == XCmode
27401 || TYPE_MODE (type) == TCmode) && align < 128)
27402 return 128;
27404 else if ((TREE_CODE (type) == RECORD_TYPE
27405 || TREE_CODE (type) == UNION_TYPE
27406 || TREE_CODE (type) == QUAL_UNION_TYPE)
27407 && TYPE_FIELDS (type))
27409 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27410 return 64;
27411 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27412 return 128;
27414 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27415 || TREE_CODE (type) == INTEGER_TYPE)
27417 if (TYPE_MODE (type) == DFmode && align < 64)
27418 return 64;
27419 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27420 return 128;
27423 return align;
27426 /* Compute the alignment for a local variable or a stack slot. EXP is
27427 the data type or decl itself, MODE is the widest mode available and
27428 ALIGN is the alignment that the object would ordinarily have. The
27429 value of this macro is used instead of that alignment to align the
27430 object. */
27432 unsigned int
27433 ix86_local_alignment (tree exp, machine_mode mode,
27434 unsigned int align)
27436 tree type, decl;
27438 if (exp && DECL_P (exp))
27440 type = TREE_TYPE (exp);
27441 decl = exp;
27443 else
27445 type = exp;
27446 decl = NULL;
27449 /* Don't do dynamic stack realignment for long long objects with
27450 -mpreferred-stack-boundary=2. */
27451 if (!TARGET_64BIT
27452 && align == 64
27453 && ix86_preferred_stack_boundary < 64
27454 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27455 && (!type || !TYPE_USER_ALIGN (type))
27456 && (!decl || !DECL_USER_ALIGN (decl)))
27457 align = 32;
27459 /* If TYPE is NULL, we are allocating a stack slot for caller-save
27460 register in MODE. We will return the largest alignment of XF
27461 and DF. */
27462 if (!type)
27464 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27465 align = GET_MODE_ALIGNMENT (DFmode);
27466 return align;
27469 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27470 to 16byte boundary. Exact wording is:
27472 An array uses the same alignment as its elements, except that a local or
27473 global array variable of length at least 16 bytes or
27474 a C99 variable-length array variable always has alignment of at least 16 bytes.
27476 This was added to allow use of aligned SSE instructions at arrays. This
27477 rule is meant for static storage (where compiler can not do the analysis
27478 by itself). We follow it for automatic variables only when convenient.
27479 We fully control everything in the function compiled and functions from
27480 other unit can not rely on the alignment.
27482 Exclude va_list type. It is the common case of local array where
27483 we can not benefit from the alignment.
27485 TODO: Probably one should optimize for size only when var is not escaping. */
27486 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27487 && TARGET_SSE)
27489 if (AGGREGATE_TYPE_P (type)
27490 && (va_list_type_node == NULL_TREE
27491 || (TYPE_MAIN_VARIANT (type)
27492 != TYPE_MAIN_VARIANT (va_list_type_node)))
27493 && TYPE_SIZE (type)
27494 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27495 && wi::geu_p (TYPE_SIZE (type), 16)
27496 && align < 128)
27497 return 128;
27499 if (TREE_CODE (type) == ARRAY_TYPE)
27501 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27502 return 64;
27503 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27504 return 128;
27506 else if (TREE_CODE (type) == COMPLEX_TYPE)
27508 if (TYPE_MODE (type) == DCmode && align < 64)
27509 return 64;
27510 if ((TYPE_MODE (type) == XCmode
27511 || TYPE_MODE (type) == TCmode) && align < 128)
27512 return 128;
27514 else if ((TREE_CODE (type) == RECORD_TYPE
27515 || TREE_CODE (type) == UNION_TYPE
27516 || TREE_CODE (type) == QUAL_UNION_TYPE)
27517 && TYPE_FIELDS (type))
27519 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27520 return 64;
27521 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27522 return 128;
27524 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27525 || TREE_CODE (type) == INTEGER_TYPE)
27528 if (TYPE_MODE (type) == DFmode && align < 64)
27529 return 64;
27530 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27531 return 128;
27533 return align;
27536 /* Compute the minimum required alignment for dynamic stack realignment
27537 purposes for a local variable, parameter or a stack slot. EXP is
27538 the data type or decl itself, MODE is its mode and ALIGN is the
27539 alignment that the object would ordinarily have. */
27541 unsigned int
27542 ix86_minimum_alignment (tree exp, machine_mode mode,
27543 unsigned int align)
27545 tree type, decl;
27547 if (exp && DECL_P (exp))
27549 type = TREE_TYPE (exp);
27550 decl = exp;
27552 else
27554 type = exp;
27555 decl = NULL;
27558 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27559 return align;
27561 /* Don't do dynamic stack realignment for long long objects with
27562 -mpreferred-stack-boundary=2. */
27563 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27564 && (!type || !TYPE_USER_ALIGN (type))
27565 && (!decl || !DECL_USER_ALIGN (decl)))
27566 return 32;
27568 return align;
27571 /* Find a location for the static chain incoming to a nested function.
27572 This is a register, unless all free registers are used by arguments. */
27574 static rtx
27575 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
27577 unsigned regno;
27579 /* While this function won't be called by the middle-end when a static
27580 chain isn't needed, it's also used throughout the backend so it's
27581 easiest to keep this check centralized. */
27582 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
27583 return NULL;
27585 if (TARGET_64BIT)
27587 /* We always use R10 in 64-bit mode. */
27588 regno = R10_REG;
27590 else
27592 const_tree fntype, fndecl;
27593 unsigned int ccvt;
27595 /* By default in 32-bit mode we use ECX to pass the static chain. */
27596 regno = CX_REG;
27598 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
27600 fntype = TREE_TYPE (fndecl_or_type);
27601 fndecl = fndecl_or_type;
27603 else
27605 fntype = fndecl_or_type;
27606 fndecl = NULL;
27609 ccvt = ix86_get_callcvt (fntype);
27610 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27612 /* Fastcall functions use ecx/edx for arguments, which leaves
27613 us with EAX for the static chain.
27614 Thiscall functions use ecx for arguments, which also
27615 leaves us with EAX for the static chain. */
27616 regno = AX_REG;
27618 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27620 /* Thiscall functions use ecx for arguments, which leaves
27621 us with EAX and EDX for the static chain.
27622 We are using for abi-compatibility EAX. */
27623 regno = AX_REG;
27625 else if (ix86_function_regparm (fntype, fndecl) == 3)
27627 /* For regparm 3, we have no free call-clobbered registers in
27628 which to store the static chain. In order to implement this,
27629 we have the trampoline push the static chain to the stack.
27630 However, we can't push a value below the return address when
27631 we call the nested function directly, so we have to use an
27632 alternate entry point. For this we use ESI, and have the
27633 alternate entry point push ESI, so that things appear the
27634 same once we're executing the nested function. */
27635 if (incoming_p)
27637 if (fndecl == current_function_decl)
27638 ix86_static_chain_on_stack = true;
27639 return gen_frame_mem (SImode,
27640 plus_constant (Pmode,
27641 arg_pointer_rtx, -8));
27643 regno = SI_REG;
27647 return gen_rtx_REG (Pmode, regno);
27650 /* Emit RTL insns to initialize the variable parts of a trampoline.
27651 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27652 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27653 to be passed to the target function. */
27655 static void
27656 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27658 rtx mem, fnaddr;
27659 int opcode;
27660 int offset = 0;
27662 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27664 if (TARGET_64BIT)
27666 int size;
27668 /* Load the function address to r11. Try to load address using
27669 the shorter movl instead of movabs. We may want to support
27670 movq for kernel mode, but kernel does not use trampolines at
27671 the moment. FNADDR is a 32bit address and may not be in
27672 DImode when ptr_mode == SImode. Always use movl in this
27673 case. */
27674 if (ptr_mode == SImode
27675 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27677 fnaddr = copy_addr_to_reg (fnaddr);
27679 mem = adjust_address (m_tramp, HImode, offset);
27680 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27682 mem = adjust_address (m_tramp, SImode, offset + 2);
27683 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27684 offset += 6;
27686 else
27688 mem = adjust_address (m_tramp, HImode, offset);
27689 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27691 mem = adjust_address (m_tramp, DImode, offset + 2);
27692 emit_move_insn (mem, fnaddr);
27693 offset += 10;
27696 /* Load static chain using movabs to r10. Use the shorter movl
27697 instead of movabs when ptr_mode == SImode. */
27698 if (ptr_mode == SImode)
27700 opcode = 0xba41;
27701 size = 6;
27703 else
27705 opcode = 0xba49;
27706 size = 10;
27709 mem = adjust_address (m_tramp, HImode, offset);
27710 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27712 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27713 emit_move_insn (mem, chain_value);
27714 offset += size;
27716 /* Jump to r11; the last (unused) byte is a nop, only there to
27717 pad the write out to a single 32-bit store. */
27718 mem = adjust_address (m_tramp, SImode, offset);
27719 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27720 offset += 4;
27722 else
27724 rtx disp, chain;
27726 /* Depending on the static chain location, either load a register
27727 with a constant, or push the constant to the stack. All of the
27728 instructions are the same size. */
27729 chain = ix86_static_chain (fndecl, true);
27730 if (REG_P (chain))
27732 switch (REGNO (chain))
27734 case AX_REG:
27735 opcode = 0xb8; break;
27736 case CX_REG:
27737 opcode = 0xb9; break;
27738 default:
27739 gcc_unreachable ();
27742 else
27743 opcode = 0x68;
27745 mem = adjust_address (m_tramp, QImode, offset);
27746 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27748 mem = adjust_address (m_tramp, SImode, offset + 1);
27749 emit_move_insn (mem, chain_value);
27750 offset += 5;
27752 mem = adjust_address (m_tramp, QImode, offset);
27753 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27755 mem = adjust_address (m_tramp, SImode, offset + 1);
27757 /* Compute offset from the end of the jmp to the target function.
27758 In the case in which the trampoline stores the static chain on
27759 the stack, we need to skip the first insn which pushes the
27760 (call-saved) register static chain; this push is 1 byte. */
27761 offset += 5;
27762 disp = expand_binop (SImode, sub_optab, fnaddr,
27763 plus_constant (Pmode, XEXP (m_tramp, 0),
27764 offset - (MEM_P (chain) ? 1 : 0)),
27765 NULL_RTX, 1, OPTAB_DIRECT);
27766 emit_move_insn (mem, disp);
27769 gcc_assert (offset <= TRAMPOLINE_SIZE);
27771 #ifdef HAVE_ENABLE_EXECUTE_STACK
27772 #ifdef CHECK_EXECUTE_STACK_ENABLED
27773 if (CHECK_EXECUTE_STACK_ENABLED)
27774 #endif
27775 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27776 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27777 #endif
27780 /* The following file contains several enumerations and data structures
27781 built from the definitions in i386-builtin-types.def. */
27783 #include "i386-builtin-types.inc"
27785 /* Table for the ix86 builtin non-function types. */
27786 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27788 /* Retrieve an element from the above table, building some of
27789 the types lazily. */
27791 static tree
27792 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27794 unsigned int index;
27795 tree type, itype;
27797 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27799 type = ix86_builtin_type_tab[(int) tcode];
27800 if (type != NULL)
27801 return type;
27803 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27804 if (tcode <= IX86_BT_LAST_VECT)
27806 machine_mode mode;
27808 index = tcode - IX86_BT_LAST_PRIM - 1;
27809 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27810 mode = ix86_builtin_type_vect_mode[index];
27812 type = build_vector_type_for_mode (itype, mode);
27814 else
27816 int quals;
27818 index = tcode - IX86_BT_LAST_VECT - 1;
27819 if (tcode <= IX86_BT_LAST_PTR)
27820 quals = TYPE_UNQUALIFIED;
27821 else
27822 quals = TYPE_QUAL_CONST;
27824 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27825 if (quals != TYPE_UNQUALIFIED)
27826 itype = build_qualified_type (itype, quals);
27828 type = build_pointer_type (itype);
27831 ix86_builtin_type_tab[(int) tcode] = type;
27832 return type;
27835 /* Table for the ix86 builtin function types. */
27836 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27838 /* Retrieve an element from the above table, building some of
27839 the types lazily. */
27841 static tree
27842 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27844 tree type;
27846 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27848 type = ix86_builtin_func_type_tab[(int) tcode];
27849 if (type != NULL)
27850 return type;
27852 if (tcode <= IX86_BT_LAST_FUNC)
27854 unsigned start = ix86_builtin_func_start[(int) tcode];
27855 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27856 tree rtype, atype, args = void_list_node;
27857 unsigned i;
27859 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27860 for (i = after - 1; i > start; --i)
27862 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27863 args = tree_cons (NULL, atype, args);
27866 type = build_function_type (rtype, args);
27868 else
27870 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27871 enum ix86_builtin_func_type icode;
27873 icode = ix86_builtin_func_alias_base[index];
27874 type = ix86_get_builtin_func_type (icode);
27877 ix86_builtin_func_type_tab[(int) tcode] = type;
27878 return type;
27882 /* Codes for all the SSE/MMX builtins. */
27883 enum ix86_builtins
27885 IX86_BUILTIN_ADDPS,
27886 IX86_BUILTIN_ADDSS,
27887 IX86_BUILTIN_DIVPS,
27888 IX86_BUILTIN_DIVSS,
27889 IX86_BUILTIN_MULPS,
27890 IX86_BUILTIN_MULSS,
27891 IX86_BUILTIN_SUBPS,
27892 IX86_BUILTIN_SUBSS,
27894 IX86_BUILTIN_CMPEQPS,
27895 IX86_BUILTIN_CMPLTPS,
27896 IX86_BUILTIN_CMPLEPS,
27897 IX86_BUILTIN_CMPGTPS,
27898 IX86_BUILTIN_CMPGEPS,
27899 IX86_BUILTIN_CMPNEQPS,
27900 IX86_BUILTIN_CMPNLTPS,
27901 IX86_BUILTIN_CMPNLEPS,
27902 IX86_BUILTIN_CMPNGTPS,
27903 IX86_BUILTIN_CMPNGEPS,
27904 IX86_BUILTIN_CMPORDPS,
27905 IX86_BUILTIN_CMPUNORDPS,
27906 IX86_BUILTIN_CMPEQSS,
27907 IX86_BUILTIN_CMPLTSS,
27908 IX86_BUILTIN_CMPLESS,
27909 IX86_BUILTIN_CMPNEQSS,
27910 IX86_BUILTIN_CMPNLTSS,
27911 IX86_BUILTIN_CMPNLESS,
27912 IX86_BUILTIN_CMPORDSS,
27913 IX86_BUILTIN_CMPUNORDSS,
27915 IX86_BUILTIN_COMIEQSS,
27916 IX86_BUILTIN_COMILTSS,
27917 IX86_BUILTIN_COMILESS,
27918 IX86_BUILTIN_COMIGTSS,
27919 IX86_BUILTIN_COMIGESS,
27920 IX86_BUILTIN_COMINEQSS,
27921 IX86_BUILTIN_UCOMIEQSS,
27922 IX86_BUILTIN_UCOMILTSS,
27923 IX86_BUILTIN_UCOMILESS,
27924 IX86_BUILTIN_UCOMIGTSS,
27925 IX86_BUILTIN_UCOMIGESS,
27926 IX86_BUILTIN_UCOMINEQSS,
27928 IX86_BUILTIN_CVTPI2PS,
27929 IX86_BUILTIN_CVTPS2PI,
27930 IX86_BUILTIN_CVTSI2SS,
27931 IX86_BUILTIN_CVTSI642SS,
27932 IX86_BUILTIN_CVTSS2SI,
27933 IX86_BUILTIN_CVTSS2SI64,
27934 IX86_BUILTIN_CVTTPS2PI,
27935 IX86_BUILTIN_CVTTSS2SI,
27936 IX86_BUILTIN_CVTTSS2SI64,
27938 IX86_BUILTIN_MAXPS,
27939 IX86_BUILTIN_MAXSS,
27940 IX86_BUILTIN_MINPS,
27941 IX86_BUILTIN_MINSS,
27943 IX86_BUILTIN_LOADUPS,
27944 IX86_BUILTIN_STOREUPS,
27945 IX86_BUILTIN_MOVSS,
27947 IX86_BUILTIN_MOVHLPS,
27948 IX86_BUILTIN_MOVLHPS,
27949 IX86_BUILTIN_LOADHPS,
27950 IX86_BUILTIN_LOADLPS,
27951 IX86_BUILTIN_STOREHPS,
27952 IX86_BUILTIN_STORELPS,
27954 IX86_BUILTIN_MASKMOVQ,
27955 IX86_BUILTIN_MOVMSKPS,
27956 IX86_BUILTIN_PMOVMSKB,
27958 IX86_BUILTIN_MOVNTPS,
27959 IX86_BUILTIN_MOVNTQ,
27961 IX86_BUILTIN_LOADDQU,
27962 IX86_BUILTIN_STOREDQU,
27964 IX86_BUILTIN_PACKSSWB,
27965 IX86_BUILTIN_PACKSSDW,
27966 IX86_BUILTIN_PACKUSWB,
27968 IX86_BUILTIN_PADDB,
27969 IX86_BUILTIN_PADDW,
27970 IX86_BUILTIN_PADDD,
27971 IX86_BUILTIN_PADDQ,
27972 IX86_BUILTIN_PADDSB,
27973 IX86_BUILTIN_PADDSW,
27974 IX86_BUILTIN_PADDUSB,
27975 IX86_BUILTIN_PADDUSW,
27976 IX86_BUILTIN_PSUBB,
27977 IX86_BUILTIN_PSUBW,
27978 IX86_BUILTIN_PSUBD,
27979 IX86_BUILTIN_PSUBQ,
27980 IX86_BUILTIN_PSUBSB,
27981 IX86_BUILTIN_PSUBSW,
27982 IX86_BUILTIN_PSUBUSB,
27983 IX86_BUILTIN_PSUBUSW,
27985 IX86_BUILTIN_PAND,
27986 IX86_BUILTIN_PANDN,
27987 IX86_BUILTIN_POR,
27988 IX86_BUILTIN_PXOR,
27990 IX86_BUILTIN_PAVGB,
27991 IX86_BUILTIN_PAVGW,
27993 IX86_BUILTIN_PCMPEQB,
27994 IX86_BUILTIN_PCMPEQW,
27995 IX86_BUILTIN_PCMPEQD,
27996 IX86_BUILTIN_PCMPGTB,
27997 IX86_BUILTIN_PCMPGTW,
27998 IX86_BUILTIN_PCMPGTD,
28000 IX86_BUILTIN_PMADDWD,
28002 IX86_BUILTIN_PMAXSW,
28003 IX86_BUILTIN_PMAXUB,
28004 IX86_BUILTIN_PMINSW,
28005 IX86_BUILTIN_PMINUB,
28007 IX86_BUILTIN_PMULHUW,
28008 IX86_BUILTIN_PMULHW,
28009 IX86_BUILTIN_PMULLW,
28011 IX86_BUILTIN_PSADBW,
28012 IX86_BUILTIN_PSHUFW,
28014 IX86_BUILTIN_PSLLW,
28015 IX86_BUILTIN_PSLLD,
28016 IX86_BUILTIN_PSLLQ,
28017 IX86_BUILTIN_PSRAW,
28018 IX86_BUILTIN_PSRAD,
28019 IX86_BUILTIN_PSRLW,
28020 IX86_BUILTIN_PSRLD,
28021 IX86_BUILTIN_PSRLQ,
28022 IX86_BUILTIN_PSLLWI,
28023 IX86_BUILTIN_PSLLDI,
28024 IX86_BUILTIN_PSLLQI,
28025 IX86_BUILTIN_PSRAWI,
28026 IX86_BUILTIN_PSRADI,
28027 IX86_BUILTIN_PSRLWI,
28028 IX86_BUILTIN_PSRLDI,
28029 IX86_BUILTIN_PSRLQI,
28031 IX86_BUILTIN_PUNPCKHBW,
28032 IX86_BUILTIN_PUNPCKHWD,
28033 IX86_BUILTIN_PUNPCKHDQ,
28034 IX86_BUILTIN_PUNPCKLBW,
28035 IX86_BUILTIN_PUNPCKLWD,
28036 IX86_BUILTIN_PUNPCKLDQ,
28038 IX86_BUILTIN_SHUFPS,
28040 IX86_BUILTIN_RCPPS,
28041 IX86_BUILTIN_RCPSS,
28042 IX86_BUILTIN_RSQRTPS,
28043 IX86_BUILTIN_RSQRTPS_NR,
28044 IX86_BUILTIN_RSQRTSS,
28045 IX86_BUILTIN_RSQRTF,
28046 IX86_BUILTIN_SQRTPS,
28047 IX86_BUILTIN_SQRTPS_NR,
28048 IX86_BUILTIN_SQRTSS,
28050 IX86_BUILTIN_UNPCKHPS,
28051 IX86_BUILTIN_UNPCKLPS,
28053 IX86_BUILTIN_ANDPS,
28054 IX86_BUILTIN_ANDNPS,
28055 IX86_BUILTIN_ORPS,
28056 IX86_BUILTIN_XORPS,
28058 IX86_BUILTIN_EMMS,
28059 IX86_BUILTIN_LDMXCSR,
28060 IX86_BUILTIN_STMXCSR,
28061 IX86_BUILTIN_SFENCE,
28063 IX86_BUILTIN_FXSAVE,
28064 IX86_BUILTIN_FXRSTOR,
28065 IX86_BUILTIN_FXSAVE64,
28066 IX86_BUILTIN_FXRSTOR64,
28068 IX86_BUILTIN_XSAVE,
28069 IX86_BUILTIN_XRSTOR,
28070 IX86_BUILTIN_XSAVE64,
28071 IX86_BUILTIN_XRSTOR64,
28073 IX86_BUILTIN_XSAVEOPT,
28074 IX86_BUILTIN_XSAVEOPT64,
28076 IX86_BUILTIN_XSAVEC,
28077 IX86_BUILTIN_XSAVEC64,
28079 IX86_BUILTIN_XSAVES,
28080 IX86_BUILTIN_XRSTORS,
28081 IX86_BUILTIN_XSAVES64,
28082 IX86_BUILTIN_XRSTORS64,
28084 /* 3DNow! Original */
28085 IX86_BUILTIN_FEMMS,
28086 IX86_BUILTIN_PAVGUSB,
28087 IX86_BUILTIN_PF2ID,
28088 IX86_BUILTIN_PFACC,
28089 IX86_BUILTIN_PFADD,
28090 IX86_BUILTIN_PFCMPEQ,
28091 IX86_BUILTIN_PFCMPGE,
28092 IX86_BUILTIN_PFCMPGT,
28093 IX86_BUILTIN_PFMAX,
28094 IX86_BUILTIN_PFMIN,
28095 IX86_BUILTIN_PFMUL,
28096 IX86_BUILTIN_PFRCP,
28097 IX86_BUILTIN_PFRCPIT1,
28098 IX86_BUILTIN_PFRCPIT2,
28099 IX86_BUILTIN_PFRSQIT1,
28100 IX86_BUILTIN_PFRSQRT,
28101 IX86_BUILTIN_PFSUB,
28102 IX86_BUILTIN_PFSUBR,
28103 IX86_BUILTIN_PI2FD,
28104 IX86_BUILTIN_PMULHRW,
28106 /* 3DNow! Athlon Extensions */
28107 IX86_BUILTIN_PF2IW,
28108 IX86_BUILTIN_PFNACC,
28109 IX86_BUILTIN_PFPNACC,
28110 IX86_BUILTIN_PI2FW,
28111 IX86_BUILTIN_PSWAPDSI,
28112 IX86_BUILTIN_PSWAPDSF,
28114 /* SSE2 */
28115 IX86_BUILTIN_ADDPD,
28116 IX86_BUILTIN_ADDSD,
28117 IX86_BUILTIN_DIVPD,
28118 IX86_BUILTIN_DIVSD,
28119 IX86_BUILTIN_MULPD,
28120 IX86_BUILTIN_MULSD,
28121 IX86_BUILTIN_SUBPD,
28122 IX86_BUILTIN_SUBSD,
28124 IX86_BUILTIN_CMPEQPD,
28125 IX86_BUILTIN_CMPLTPD,
28126 IX86_BUILTIN_CMPLEPD,
28127 IX86_BUILTIN_CMPGTPD,
28128 IX86_BUILTIN_CMPGEPD,
28129 IX86_BUILTIN_CMPNEQPD,
28130 IX86_BUILTIN_CMPNLTPD,
28131 IX86_BUILTIN_CMPNLEPD,
28132 IX86_BUILTIN_CMPNGTPD,
28133 IX86_BUILTIN_CMPNGEPD,
28134 IX86_BUILTIN_CMPORDPD,
28135 IX86_BUILTIN_CMPUNORDPD,
28136 IX86_BUILTIN_CMPEQSD,
28137 IX86_BUILTIN_CMPLTSD,
28138 IX86_BUILTIN_CMPLESD,
28139 IX86_BUILTIN_CMPNEQSD,
28140 IX86_BUILTIN_CMPNLTSD,
28141 IX86_BUILTIN_CMPNLESD,
28142 IX86_BUILTIN_CMPORDSD,
28143 IX86_BUILTIN_CMPUNORDSD,
28145 IX86_BUILTIN_COMIEQSD,
28146 IX86_BUILTIN_COMILTSD,
28147 IX86_BUILTIN_COMILESD,
28148 IX86_BUILTIN_COMIGTSD,
28149 IX86_BUILTIN_COMIGESD,
28150 IX86_BUILTIN_COMINEQSD,
28151 IX86_BUILTIN_UCOMIEQSD,
28152 IX86_BUILTIN_UCOMILTSD,
28153 IX86_BUILTIN_UCOMILESD,
28154 IX86_BUILTIN_UCOMIGTSD,
28155 IX86_BUILTIN_UCOMIGESD,
28156 IX86_BUILTIN_UCOMINEQSD,
28158 IX86_BUILTIN_MAXPD,
28159 IX86_BUILTIN_MAXSD,
28160 IX86_BUILTIN_MINPD,
28161 IX86_BUILTIN_MINSD,
28163 IX86_BUILTIN_ANDPD,
28164 IX86_BUILTIN_ANDNPD,
28165 IX86_BUILTIN_ORPD,
28166 IX86_BUILTIN_XORPD,
28168 IX86_BUILTIN_SQRTPD,
28169 IX86_BUILTIN_SQRTSD,
28171 IX86_BUILTIN_UNPCKHPD,
28172 IX86_BUILTIN_UNPCKLPD,
28174 IX86_BUILTIN_SHUFPD,
28176 IX86_BUILTIN_LOADUPD,
28177 IX86_BUILTIN_STOREUPD,
28178 IX86_BUILTIN_MOVSD,
28180 IX86_BUILTIN_LOADHPD,
28181 IX86_BUILTIN_LOADLPD,
28183 IX86_BUILTIN_CVTDQ2PD,
28184 IX86_BUILTIN_CVTDQ2PS,
28186 IX86_BUILTIN_CVTPD2DQ,
28187 IX86_BUILTIN_CVTPD2PI,
28188 IX86_BUILTIN_CVTPD2PS,
28189 IX86_BUILTIN_CVTTPD2DQ,
28190 IX86_BUILTIN_CVTTPD2PI,
28192 IX86_BUILTIN_CVTPI2PD,
28193 IX86_BUILTIN_CVTSI2SD,
28194 IX86_BUILTIN_CVTSI642SD,
28196 IX86_BUILTIN_CVTSD2SI,
28197 IX86_BUILTIN_CVTSD2SI64,
28198 IX86_BUILTIN_CVTSD2SS,
28199 IX86_BUILTIN_CVTSS2SD,
28200 IX86_BUILTIN_CVTTSD2SI,
28201 IX86_BUILTIN_CVTTSD2SI64,
28203 IX86_BUILTIN_CVTPS2DQ,
28204 IX86_BUILTIN_CVTPS2PD,
28205 IX86_BUILTIN_CVTTPS2DQ,
28207 IX86_BUILTIN_MOVNTI,
28208 IX86_BUILTIN_MOVNTI64,
28209 IX86_BUILTIN_MOVNTPD,
28210 IX86_BUILTIN_MOVNTDQ,
28212 IX86_BUILTIN_MOVQ128,
28214 /* SSE2 MMX */
28215 IX86_BUILTIN_MASKMOVDQU,
28216 IX86_BUILTIN_MOVMSKPD,
28217 IX86_BUILTIN_PMOVMSKB128,
28219 IX86_BUILTIN_PACKSSWB128,
28220 IX86_BUILTIN_PACKSSDW128,
28221 IX86_BUILTIN_PACKUSWB128,
28223 IX86_BUILTIN_PADDB128,
28224 IX86_BUILTIN_PADDW128,
28225 IX86_BUILTIN_PADDD128,
28226 IX86_BUILTIN_PADDQ128,
28227 IX86_BUILTIN_PADDSB128,
28228 IX86_BUILTIN_PADDSW128,
28229 IX86_BUILTIN_PADDUSB128,
28230 IX86_BUILTIN_PADDUSW128,
28231 IX86_BUILTIN_PSUBB128,
28232 IX86_BUILTIN_PSUBW128,
28233 IX86_BUILTIN_PSUBD128,
28234 IX86_BUILTIN_PSUBQ128,
28235 IX86_BUILTIN_PSUBSB128,
28236 IX86_BUILTIN_PSUBSW128,
28237 IX86_BUILTIN_PSUBUSB128,
28238 IX86_BUILTIN_PSUBUSW128,
28240 IX86_BUILTIN_PAND128,
28241 IX86_BUILTIN_PANDN128,
28242 IX86_BUILTIN_POR128,
28243 IX86_BUILTIN_PXOR128,
28245 IX86_BUILTIN_PAVGB128,
28246 IX86_BUILTIN_PAVGW128,
28248 IX86_BUILTIN_PCMPEQB128,
28249 IX86_BUILTIN_PCMPEQW128,
28250 IX86_BUILTIN_PCMPEQD128,
28251 IX86_BUILTIN_PCMPGTB128,
28252 IX86_BUILTIN_PCMPGTW128,
28253 IX86_BUILTIN_PCMPGTD128,
28255 IX86_BUILTIN_PMADDWD128,
28257 IX86_BUILTIN_PMAXSW128,
28258 IX86_BUILTIN_PMAXUB128,
28259 IX86_BUILTIN_PMINSW128,
28260 IX86_BUILTIN_PMINUB128,
28262 IX86_BUILTIN_PMULUDQ,
28263 IX86_BUILTIN_PMULUDQ128,
28264 IX86_BUILTIN_PMULHUW128,
28265 IX86_BUILTIN_PMULHW128,
28266 IX86_BUILTIN_PMULLW128,
28268 IX86_BUILTIN_PSADBW128,
28269 IX86_BUILTIN_PSHUFHW,
28270 IX86_BUILTIN_PSHUFLW,
28271 IX86_BUILTIN_PSHUFD,
28273 IX86_BUILTIN_PSLLDQI128,
28274 IX86_BUILTIN_PSLLWI128,
28275 IX86_BUILTIN_PSLLDI128,
28276 IX86_BUILTIN_PSLLQI128,
28277 IX86_BUILTIN_PSRAWI128,
28278 IX86_BUILTIN_PSRADI128,
28279 IX86_BUILTIN_PSRLDQI128,
28280 IX86_BUILTIN_PSRLWI128,
28281 IX86_BUILTIN_PSRLDI128,
28282 IX86_BUILTIN_PSRLQI128,
28284 IX86_BUILTIN_PSLLDQ128,
28285 IX86_BUILTIN_PSLLW128,
28286 IX86_BUILTIN_PSLLD128,
28287 IX86_BUILTIN_PSLLQ128,
28288 IX86_BUILTIN_PSRAW128,
28289 IX86_BUILTIN_PSRAD128,
28290 IX86_BUILTIN_PSRLW128,
28291 IX86_BUILTIN_PSRLD128,
28292 IX86_BUILTIN_PSRLQ128,
28294 IX86_BUILTIN_PUNPCKHBW128,
28295 IX86_BUILTIN_PUNPCKHWD128,
28296 IX86_BUILTIN_PUNPCKHDQ128,
28297 IX86_BUILTIN_PUNPCKHQDQ128,
28298 IX86_BUILTIN_PUNPCKLBW128,
28299 IX86_BUILTIN_PUNPCKLWD128,
28300 IX86_BUILTIN_PUNPCKLDQ128,
28301 IX86_BUILTIN_PUNPCKLQDQ128,
28303 IX86_BUILTIN_CLFLUSH,
28304 IX86_BUILTIN_MFENCE,
28305 IX86_BUILTIN_LFENCE,
28306 IX86_BUILTIN_PAUSE,
28308 IX86_BUILTIN_FNSTENV,
28309 IX86_BUILTIN_FLDENV,
28310 IX86_BUILTIN_FNSTSW,
28311 IX86_BUILTIN_FNCLEX,
28313 IX86_BUILTIN_BSRSI,
28314 IX86_BUILTIN_BSRDI,
28315 IX86_BUILTIN_RDPMC,
28316 IX86_BUILTIN_RDTSC,
28317 IX86_BUILTIN_RDTSCP,
28318 IX86_BUILTIN_ROLQI,
28319 IX86_BUILTIN_ROLHI,
28320 IX86_BUILTIN_RORQI,
28321 IX86_BUILTIN_RORHI,
28323 /* SSE3. */
28324 IX86_BUILTIN_ADDSUBPS,
28325 IX86_BUILTIN_HADDPS,
28326 IX86_BUILTIN_HSUBPS,
28327 IX86_BUILTIN_MOVSHDUP,
28328 IX86_BUILTIN_MOVSLDUP,
28329 IX86_BUILTIN_ADDSUBPD,
28330 IX86_BUILTIN_HADDPD,
28331 IX86_BUILTIN_HSUBPD,
28332 IX86_BUILTIN_LDDQU,
28334 IX86_BUILTIN_MONITOR,
28335 IX86_BUILTIN_MWAIT,
28337 /* SSSE3. */
28338 IX86_BUILTIN_PHADDW,
28339 IX86_BUILTIN_PHADDD,
28340 IX86_BUILTIN_PHADDSW,
28341 IX86_BUILTIN_PHSUBW,
28342 IX86_BUILTIN_PHSUBD,
28343 IX86_BUILTIN_PHSUBSW,
28344 IX86_BUILTIN_PMADDUBSW,
28345 IX86_BUILTIN_PMULHRSW,
28346 IX86_BUILTIN_PSHUFB,
28347 IX86_BUILTIN_PSIGNB,
28348 IX86_BUILTIN_PSIGNW,
28349 IX86_BUILTIN_PSIGND,
28350 IX86_BUILTIN_PALIGNR,
28351 IX86_BUILTIN_PABSB,
28352 IX86_BUILTIN_PABSW,
28353 IX86_BUILTIN_PABSD,
28355 IX86_BUILTIN_PHADDW128,
28356 IX86_BUILTIN_PHADDD128,
28357 IX86_BUILTIN_PHADDSW128,
28358 IX86_BUILTIN_PHSUBW128,
28359 IX86_BUILTIN_PHSUBD128,
28360 IX86_BUILTIN_PHSUBSW128,
28361 IX86_BUILTIN_PMADDUBSW128,
28362 IX86_BUILTIN_PMULHRSW128,
28363 IX86_BUILTIN_PSHUFB128,
28364 IX86_BUILTIN_PSIGNB128,
28365 IX86_BUILTIN_PSIGNW128,
28366 IX86_BUILTIN_PSIGND128,
28367 IX86_BUILTIN_PALIGNR128,
28368 IX86_BUILTIN_PABSB128,
28369 IX86_BUILTIN_PABSW128,
28370 IX86_BUILTIN_PABSD128,
28372 /* AMDFAM10 - SSE4A New Instructions. */
28373 IX86_BUILTIN_MOVNTSD,
28374 IX86_BUILTIN_MOVNTSS,
28375 IX86_BUILTIN_EXTRQI,
28376 IX86_BUILTIN_EXTRQ,
28377 IX86_BUILTIN_INSERTQI,
28378 IX86_BUILTIN_INSERTQ,
28380 /* SSE4.1. */
28381 IX86_BUILTIN_BLENDPD,
28382 IX86_BUILTIN_BLENDPS,
28383 IX86_BUILTIN_BLENDVPD,
28384 IX86_BUILTIN_BLENDVPS,
28385 IX86_BUILTIN_PBLENDVB128,
28386 IX86_BUILTIN_PBLENDW128,
28388 IX86_BUILTIN_DPPD,
28389 IX86_BUILTIN_DPPS,
28391 IX86_BUILTIN_INSERTPS128,
28393 IX86_BUILTIN_MOVNTDQA,
28394 IX86_BUILTIN_MPSADBW128,
28395 IX86_BUILTIN_PACKUSDW128,
28396 IX86_BUILTIN_PCMPEQQ,
28397 IX86_BUILTIN_PHMINPOSUW128,
28399 IX86_BUILTIN_PMAXSB128,
28400 IX86_BUILTIN_PMAXSD128,
28401 IX86_BUILTIN_PMAXUD128,
28402 IX86_BUILTIN_PMAXUW128,
28404 IX86_BUILTIN_PMINSB128,
28405 IX86_BUILTIN_PMINSD128,
28406 IX86_BUILTIN_PMINUD128,
28407 IX86_BUILTIN_PMINUW128,
28409 IX86_BUILTIN_PMOVSXBW128,
28410 IX86_BUILTIN_PMOVSXBD128,
28411 IX86_BUILTIN_PMOVSXBQ128,
28412 IX86_BUILTIN_PMOVSXWD128,
28413 IX86_BUILTIN_PMOVSXWQ128,
28414 IX86_BUILTIN_PMOVSXDQ128,
28416 IX86_BUILTIN_PMOVZXBW128,
28417 IX86_BUILTIN_PMOVZXBD128,
28418 IX86_BUILTIN_PMOVZXBQ128,
28419 IX86_BUILTIN_PMOVZXWD128,
28420 IX86_BUILTIN_PMOVZXWQ128,
28421 IX86_BUILTIN_PMOVZXDQ128,
28423 IX86_BUILTIN_PMULDQ128,
28424 IX86_BUILTIN_PMULLD128,
28426 IX86_BUILTIN_ROUNDSD,
28427 IX86_BUILTIN_ROUNDSS,
28429 IX86_BUILTIN_ROUNDPD,
28430 IX86_BUILTIN_ROUNDPS,
28432 IX86_BUILTIN_FLOORPD,
28433 IX86_BUILTIN_CEILPD,
28434 IX86_BUILTIN_TRUNCPD,
28435 IX86_BUILTIN_RINTPD,
28436 IX86_BUILTIN_ROUNDPD_AZ,
28438 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28439 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28440 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28442 IX86_BUILTIN_FLOORPS,
28443 IX86_BUILTIN_CEILPS,
28444 IX86_BUILTIN_TRUNCPS,
28445 IX86_BUILTIN_RINTPS,
28446 IX86_BUILTIN_ROUNDPS_AZ,
28448 IX86_BUILTIN_FLOORPS_SFIX,
28449 IX86_BUILTIN_CEILPS_SFIX,
28450 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28452 IX86_BUILTIN_PTESTZ,
28453 IX86_BUILTIN_PTESTC,
28454 IX86_BUILTIN_PTESTNZC,
28456 IX86_BUILTIN_VEC_INIT_V2SI,
28457 IX86_BUILTIN_VEC_INIT_V4HI,
28458 IX86_BUILTIN_VEC_INIT_V8QI,
28459 IX86_BUILTIN_VEC_EXT_V2DF,
28460 IX86_BUILTIN_VEC_EXT_V2DI,
28461 IX86_BUILTIN_VEC_EXT_V4SF,
28462 IX86_BUILTIN_VEC_EXT_V4SI,
28463 IX86_BUILTIN_VEC_EXT_V8HI,
28464 IX86_BUILTIN_VEC_EXT_V2SI,
28465 IX86_BUILTIN_VEC_EXT_V4HI,
28466 IX86_BUILTIN_VEC_EXT_V16QI,
28467 IX86_BUILTIN_VEC_SET_V2DI,
28468 IX86_BUILTIN_VEC_SET_V4SF,
28469 IX86_BUILTIN_VEC_SET_V4SI,
28470 IX86_BUILTIN_VEC_SET_V8HI,
28471 IX86_BUILTIN_VEC_SET_V4HI,
28472 IX86_BUILTIN_VEC_SET_V16QI,
28474 IX86_BUILTIN_VEC_PACK_SFIX,
28475 IX86_BUILTIN_VEC_PACK_SFIX256,
28477 /* SSE4.2. */
28478 IX86_BUILTIN_CRC32QI,
28479 IX86_BUILTIN_CRC32HI,
28480 IX86_BUILTIN_CRC32SI,
28481 IX86_BUILTIN_CRC32DI,
28483 IX86_BUILTIN_PCMPESTRI128,
28484 IX86_BUILTIN_PCMPESTRM128,
28485 IX86_BUILTIN_PCMPESTRA128,
28486 IX86_BUILTIN_PCMPESTRC128,
28487 IX86_BUILTIN_PCMPESTRO128,
28488 IX86_BUILTIN_PCMPESTRS128,
28489 IX86_BUILTIN_PCMPESTRZ128,
28490 IX86_BUILTIN_PCMPISTRI128,
28491 IX86_BUILTIN_PCMPISTRM128,
28492 IX86_BUILTIN_PCMPISTRA128,
28493 IX86_BUILTIN_PCMPISTRC128,
28494 IX86_BUILTIN_PCMPISTRO128,
28495 IX86_BUILTIN_PCMPISTRS128,
28496 IX86_BUILTIN_PCMPISTRZ128,
28498 IX86_BUILTIN_PCMPGTQ,
28500 /* AES instructions */
28501 IX86_BUILTIN_AESENC128,
28502 IX86_BUILTIN_AESENCLAST128,
28503 IX86_BUILTIN_AESDEC128,
28504 IX86_BUILTIN_AESDECLAST128,
28505 IX86_BUILTIN_AESIMC128,
28506 IX86_BUILTIN_AESKEYGENASSIST128,
28508 /* PCLMUL instruction */
28509 IX86_BUILTIN_PCLMULQDQ128,
28511 /* AVX */
28512 IX86_BUILTIN_ADDPD256,
28513 IX86_BUILTIN_ADDPS256,
28514 IX86_BUILTIN_ADDSUBPD256,
28515 IX86_BUILTIN_ADDSUBPS256,
28516 IX86_BUILTIN_ANDPD256,
28517 IX86_BUILTIN_ANDPS256,
28518 IX86_BUILTIN_ANDNPD256,
28519 IX86_BUILTIN_ANDNPS256,
28520 IX86_BUILTIN_BLENDPD256,
28521 IX86_BUILTIN_BLENDPS256,
28522 IX86_BUILTIN_BLENDVPD256,
28523 IX86_BUILTIN_BLENDVPS256,
28524 IX86_BUILTIN_DIVPD256,
28525 IX86_BUILTIN_DIVPS256,
28526 IX86_BUILTIN_DPPS256,
28527 IX86_BUILTIN_HADDPD256,
28528 IX86_BUILTIN_HADDPS256,
28529 IX86_BUILTIN_HSUBPD256,
28530 IX86_BUILTIN_HSUBPS256,
28531 IX86_BUILTIN_MAXPD256,
28532 IX86_BUILTIN_MAXPS256,
28533 IX86_BUILTIN_MINPD256,
28534 IX86_BUILTIN_MINPS256,
28535 IX86_BUILTIN_MULPD256,
28536 IX86_BUILTIN_MULPS256,
28537 IX86_BUILTIN_ORPD256,
28538 IX86_BUILTIN_ORPS256,
28539 IX86_BUILTIN_SHUFPD256,
28540 IX86_BUILTIN_SHUFPS256,
28541 IX86_BUILTIN_SUBPD256,
28542 IX86_BUILTIN_SUBPS256,
28543 IX86_BUILTIN_XORPD256,
28544 IX86_BUILTIN_XORPS256,
28545 IX86_BUILTIN_CMPSD,
28546 IX86_BUILTIN_CMPSS,
28547 IX86_BUILTIN_CMPPD,
28548 IX86_BUILTIN_CMPPS,
28549 IX86_BUILTIN_CMPPD256,
28550 IX86_BUILTIN_CMPPS256,
28551 IX86_BUILTIN_CVTDQ2PD256,
28552 IX86_BUILTIN_CVTDQ2PS256,
28553 IX86_BUILTIN_CVTPD2PS256,
28554 IX86_BUILTIN_CVTPS2DQ256,
28555 IX86_BUILTIN_CVTPS2PD256,
28556 IX86_BUILTIN_CVTTPD2DQ256,
28557 IX86_BUILTIN_CVTPD2DQ256,
28558 IX86_BUILTIN_CVTTPS2DQ256,
28559 IX86_BUILTIN_EXTRACTF128PD256,
28560 IX86_BUILTIN_EXTRACTF128PS256,
28561 IX86_BUILTIN_EXTRACTF128SI256,
28562 IX86_BUILTIN_VZEROALL,
28563 IX86_BUILTIN_VZEROUPPER,
28564 IX86_BUILTIN_VPERMILVARPD,
28565 IX86_BUILTIN_VPERMILVARPS,
28566 IX86_BUILTIN_VPERMILVARPD256,
28567 IX86_BUILTIN_VPERMILVARPS256,
28568 IX86_BUILTIN_VPERMILPD,
28569 IX86_BUILTIN_VPERMILPS,
28570 IX86_BUILTIN_VPERMILPD256,
28571 IX86_BUILTIN_VPERMILPS256,
28572 IX86_BUILTIN_VPERMIL2PD,
28573 IX86_BUILTIN_VPERMIL2PS,
28574 IX86_BUILTIN_VPERMIL2PD256,
28575 IX86_BUILTIN_VPERMIL2PS256,
28576 IX86_BUILTIN_VPERM2F128PD256,
28577 IX86_BUILTIN_VPERM2F128PS256,
28578 IX86_BUILTIN_VPERM2F128SI256,
28579 IX86_BUILTIN_VBROADCASTSS,
28580 IX86_BUILTIN_VBROADCASTSD256,
28581 IX86_BUILTIN_VBROADCASTSS256,
28582 IX86_BUILTIN_VBROADCASTPD256,
28583 IX86_BUILTIN_VBROADCASTPS256,
28584 IX86_BUILTIN_VINSERTF128PD256,
28585 IX86_BUILTIN_VINSERTF128PS256,
28586 IX86_BUILTIN_VINSERTF128SI256,
28587 IX86_BUILTIN_LOADUPD256,
28588 IX86_BUILTIN_LOADUPS256,
28589 IX86_BUILTIN_STOREUPD256,
28590 IX86_BUILTIN_STOREUPS256,
28591 IX86_BUILTIN_LDDQU256,
28592 IX86_BUILTIN_MOVNTDQ256,
28593 IX86_BUILTIN_MOVNTPD256,
28594 IX86_BUILTIN_MOVNTPS256,
28595 IX86_BUILTIN_LOADDQU256,
28596 IX86_BUILTIN_STOREDQU256,
28597 IX86_BUILTIN_MASKLOADPD,
28598 IX86_BUILTIN_MASKLOADPS,
28599 IX86_BUILTIN_MASKSTOREPD,
28600 IX86_BUILTIN_MASKSTOREPS,
28601 IX86_BUILTIN_MASKLOADPD256,
28602 IX86_BUILTIN_MASKLOADPS256,
28603 IX86_BUILTIN_MASKSTOREPD256,
28604 IX86_BUILTIN_MASKSTOREPS256,
28605 IX86_BUILTIN_MOVSHDUP256,
28606 IX86_BUILTIN_MOVSLDUP256,
28607 IX86_BUILTIN_MOVDDUP256,
28609 IX86_BUILTIN_SQRTPD256,
28610 IX86_BUILTIN_SQRTPS256,
28611 IX86_BUILTIN_SQRTPS_NR256,
28612 IX86_BUILTIN_RSQRTPS256,
28613 IX86_BUILTIN_RSQRTPS_NR256,
28615 IX86_BUILTIN_RCPPS256,
28617 IX86_BUILTIN_ROUNDPD256,
28618 IX86_BUILTIN_ROUNDPS256,
28620 IX86_BUILTIN_FLOORPD256,
28621 IX86_BUILTIN_CEILPD256,
28622 IX86_BUILTIN_TRUNCPD256,
28623 IX86_BUILTIN_RINTPD256,
28624 IX86_BUILTIN_ROUNDPD_AZ256,
28626 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28627 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28628 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28630 IX86_BUILTIN_FLOORPS256,
28631 IX86_BUILTIN_CEILPS256,
28632 IX86_BUILTIN_TRUNCPS256,
28633 IX86_BUILTIN_RINTPS256,
28634 IX86_BUILTIN_ROUNDPS_AZ256,
28636 IX86_BUILTIN_FLOORPS_SFIX256,
28637 IX86_BUILTIN_CEILPS_SFIX256,
28638 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28640 IX86_BUILTIN_UNPCKHPD256,
28641 IX86_BUILTIN_UNPCKLPD256,
28642 IX86_BUILTIN_UNPCKHPS256,
28643 IX86_BUILTIN_UNPCKLPS256,
28645 IX86_BUILTIN_SI256_SI,
28646 IX86_BUILTIN_PS256_PS,
28647 IX86_BUILTIN_PD256_PD,
28648 IX86_BUILTIN_SI_SI256,
28649 IX86_BUILTIN_PS_PS256,
28650 IX86_BUILTIN_PD_PD256,
28652 IX86_BUILTIN_VTESTZPD,
28653 IX86_BUILTIN_VTESTCPD,
28654 IX86_BUILTIN_VTESTNZCPD,
28655 IX86_BUILTIN_VTESTZPS,
28656 IX86_BUILTIN_VTESTCPS,
28657 IX86_BUILTIN_VTESTNZCPS,
28658 IX86_BUILTIN_VTESTZPD256,
28659 IX86_BUILTIN_VTESTCPD256,
28660 IX86_BUILTIN_VTESTNZCPD256,
28661 IX86_BUILTIN_VTESTZPS256,
28662 IX86_BUILTIN_VTESTCPS256,
28663 IX86_BUILTIN_VTESTNZCPS256,
28664 IX86_BUILTIN_PTESTZ256,
28665 IX86_BUILTIN_PTESTC256,
28666 IX86_BUILTIN_PTESTNZC256,
28668 IX86_BUILTIN_MOVMSKPD256,
28669 IX86_BUILTIN_MOVMSKPS256,
28671 /* AVX2 */
28672 IX86_BUILTIN_MPSADBW256,
28673 IX86_BUILTIN_PABSB256,
28674 IX86_BUILTIN_PABSW256,
28675 IX86_BUILTIN_PABSD256,
28676 IX86_BUILTIN_PACKSSDW256,
28677 IX86_BUILTIN_PACKSSWB256,
28678 IX86_BUILTIN_PACKUSDW256,
28679 IX86_BUILTIN_PACKUSWB256,
28680 IX86_BUILTIN_PADDB256,
28681 IX86_BUILTIN_PADDW256,
28682 IX86_BUILTIN_PADDD256,
28683 IX86_BUILTIN_PADDQ256,
28684 IX86_BUILTIN_PADDSB256,
28685 IX86_BUILTIN_PADDSW256,
28686 IX86_BUILTIN_PADDUSB256,
28687 IX86_BUILTIN_PADDUSW256,
28688 IX86_BUILTIN_PALIGNR256,
28689 IX86_BUILTIN_AND256I,
28690 IX86_BUILTIN_ANDNOT256I,
28691 IX86_BUILTIN_PAVGB256,
28692 IX86_BUILTIN_PAVGW256,
28693 IX86_BUILTIN_PBLENDVB256,
28694 IX86_BUILTIN_PBLENDVW256,
28695 IX86_BUILTIN_PCMPEQB256,
28696 IX86_BUILTIN_PCMPEQW256,
28697 IX86_BUILTIN_PCMPEQD256,
28698 IX86_BUILTIN_PCMPEQQ256,
28699 IX86_BUILTIN_PCMPGTB256,
28700 IX86_BUILTIN_PCMPGTW256,
28701 IX86_BUILTIN_PCMPGTD256,
28702 IX86_BUILTIN_PCMPGTQ256,
28703 IX86_BUILTIN_PHADDW256,
28704 IX86_BUILTIN_PHADDD256,
28705 IX86_BUILTIN_PHADDSW256,
28706 IX86_BUILTIN_PHSUBW256,
28707 IX86_BUILTIN_PHSUBD256,
28708 IX86_BUILTIN_PHSUBSW256,
28709 IX86_BUILTIN_PMADDUBSW256,
28710 IX86_BUILTIN_PMADDWD256,
28711 IX86_BUILTIN_PMAXSB256,
28712 IX86_BUILTIN_PMAXSW256,
28713 IX86_BUILTIN_PMAXSD256,
28714 IX86_BUILTIN_PMAXUB256,
28715 IX86_BUILTIN_PMAXUW256,
28716 IX86_BUILTIN_PMAXUD256,
28717 IX86_BUILTIN_PMINSB256,
28718 IX86_BUILTIN_PMINSW256,
28719 IX86_BUILTIN_PMINSD256,
28720 IX86_BUILTIN_PMINUB256,
28721 IX86_BUILTIN_PMINUW256,
28722 IX86_BUILTIN_PMINUD256,
28723 IX86_BUILTIN_PMOVMSKB256,
28724 IX86_BUILTIN_PMOVSXBW256,
28725 IX86_BUILTIN_PMOVSXBD256,
28726 IX86_BUILTIN_PMOVSXBQ256,
28727 IX86_BUILTIN_PMOVSXWD256,
28728 IX86_BUILTIN_PMOVSXWQ256,
28729 IX86_BUILTIN_PMOVSXDQ256,
28730 IX86_BUILTIN_PMOVZXBW256,
28731 IX86_BUILTIN_PMOVZXBD256,
28732 IX86_BUILTIN_PMOVZXBQ256,
28733 IX86_BUILTIN_PMOVZXWD256,
28734 IX86_BUILTIN_PMOVZXWQ256,
28735 IX86_BUILTIN_PMOVZXDQ256,
28736 IX86_BUILTIN_PMULDQ256,
28737 IX86_BUILTIN_PMULHRSW256,
28738 IX86_BUILTIN_PMULHUW256,
28739 IX86_BUILTIN_PMULHW256,
28740 IX86_BUILTIN_PMULLW256,
28741 IX86_BUILTIN_PMULLD256,
28742 IX86_BUILTIN_PMULUDQ256,
28743 IX86_BUILTIN_POR256,
28744 IX86_BUILTIN_PSADBW256,
28745 IX86_BUILTIN_PSHUFB256,
28746 IX86_BUILTIN_PSHUFD256,
28747 IX86_BUILTIN_PSHUFHW256,
28748 IX86_BUILTIN_PSHUFLW256,
28749 IX86_BUILTIN_PSIGNB256,
28750 IX86_BUILTIN_PSIGNW256,
28751 IX86_BUILTIN_PSIGND256,
28752 IX86_BUILTIN_PSLLDQI256,
28753 IX86_BUILTIN_PSLLWI256,
28754 IX86_BUILTIN_PSLLW256,
28755 IX86_BUILTIN_PSLLDI256,
28756 IX86_BUILTIN_PSLLD256,
28757 IX86_BUILTIN_PSLLQI256,
28758 IX86_BUILTIN_PSLLQ256,
28759 IX86_BUILTIN_PSRAWI256,
28760 IX86_BUILTIN_PSRAW256,
28761 IX86_BUILTIN_PSRADI256,
28762 IX86_BUILTIN_PSRAD256,
28763 IX86_BUILTIN_PSRLDQI256,
28764 IX86_BUILTIN_PSRLWI256,
28765 IX86_BUILTIN_PSRLW256,
28766 IX86_BUILTIN_PSRLDI256,
28767 IX86_BUILTIN_PSRLD256,
28768 IX86_BUILTIN_PSRLQI256,
28769 IX86_BUILTIN_PSRLQ256,
28770 IX86_BUILTIN_PSUBB256,
28771 IX86_BUILTIN_PSUBW256,
28772 IX86_BUILTIN_PSUBD256,
28773 IX86_BUILTIN_PSUBQ256,
28774 IX86_BUILTIN_PSUBSB256,
28775 IX86_BUILTIN_PSUBSW256,
28776 IX86_BUILTIN_PSUBUSB256,
28777 IX86_BUILTIN_PSUBUSW256,
28778 IX86_BUILTIN_PUNPCKHBW256,
28779 IX86_BUILTIN_PUNPCKHWD256,
28780 IX86_BUILTIN_PUNPCKHDQ256,
28781 IX86_BUILTIN_PUNPCKHQDQ256,
28782 IX86_BUILTIN_PUNPCKLBW256,
28783 IX86_BUILTIN_PUNPCKLWD256,
28784 IX86_BUILTIN_PUNPCKLDQ256,
28785 IX86_BUILTIN_PUNPCKLQDQ256,
28786 IX86_BUILTIN_PXOR256,
28787 IX86_BUILTIN_MOVNTDQA256,
28788 IX86_BUILTIN_VBROADCASTSS_PS,
28789 IX86_BUILTIN_VBROADCASTSS_PS256,
28790 IX86_BUILTIN_VBROADCASTSD_PD256,
28791 IX86_BUILTIN_VBROADCASTSI256,
28792 IX86_BUILTIN_PBLENDD256,
28793 IX86_BUILTIN_PBLENDD128,
28794 IX86_BUILTIN_PBROADCASTB256,
28795 IX86_BUILTIN_PBROADCASTW256,
28796 IX86_BUILTIN_PBROADCASTD256,
28797 IX86_BUILTIN_PBROADCASTQ256,
28798 IX86_BUILTIN_PBROADCASTB128,
28799 IX86_BUILTIN_PBROADCASTW128,
28800 IX86_BUILTIN_PBROADCASTD128,
28801 IX86_BUILTIN_PBROADCASTQ128,
28802 IX86_BUILTIN_VPERMVARSI256,
28803 IX86_BUILTIN_VPERMDF256,
28804 IX86_BUILTIN_VPERMVARSF256,
28805 IX86_BUILTIN_VPERMDI256,
28806 IX86_BUILTIN_VPERMTI256,
28807 IX86_BUILTIN_VEXTRACT128I256,
28808 IX86_BUILTIN_VINSERT128I256,
28809 IX86_BUILTIN_MASKLOADD,
28810 IX86_BUILTIN_MASKLOADQ,
28811 IX86_BUILTIN_MASKLOADD256,
28812 IX86_BUILTIN_MASKLOADQ256,
28813 IX86_BUILTIN_MASKSTORED,
28814 IX86_BUILTIN_MASKSTOREQ,
28815 IX86_BUILTIN_MASKSTORED256,
28816 IX86_BUILTIN_MASKSTOREQ256,
28817 IX86_BUILTIN_PSLLVV4DI,
28818 IX86_BUILTIN_PSLLVV2DI,
28819 IX86_BUILTIN_PSLLVV8SI,
28820 IX86_BUILTIN_PSLLVV4SI,
28821 IX86_BUILTIN_PSRAVV8SI,
28822 IX86_BUILTIN_PSRAVV4SI,
28823 IX86_BUILTIN_PSRLVV4DI,
28824 IX86_BUILTIN_PSRLVV2DI,
28825 IX86_BUILTIN_PSRLVV8SI,
28826 IX86_BUILTIN_PSRLVV4SI,
28828 IX86_BUILTIN_GATHERSIV2DF,
28829 IX86_BUILTIN_GATHERSIV4DF,
28830 IX86_BUILTIN_GATHERDIV2DF,
28831 IX86_BUILTIN_GATHERDIV4DF,
28832 IX86_BUILTIN_GATHERSIV4SF,
28833 IX86_BUILTIN_GATHERSIV8SF,
28834 IX86_BUILTIN_GATHERDIV4SF,
28835 IX86_BUILTIN_GATHERDIV8SF,
28836 IX86_BUILTIN_GATHERSIV2DI,
28837 IX86_BUILTIN_GATHERSIV4DI,
28838 IX86_BUILTIN_GATHERDIV2DI,
28839 IX86_BUILTIN_GATHERDIV4DI,
28840 IX86_BUILTIN_GATHERSIV4SI,
28841 IX86_BUILTIN_GATHERSIV8SI,
28842 IX86_BUILTIN_GATHERDIV4SI,
28843 IX86_BUILTIN_GATHERDIV8SI,
28845 /* AVX512F */
28846 IX86_BUILTIN_SI512_SI256,
28847 IX86_BUILTIN_PD512_PD256,
28848 IX86_BUILTIN_PS512_PS256,
28849 IX86_BUILTIN_SI512_SI,
28850 IX86_BUILTIN_PD512_PD,
28851 IX86_BUILTIN_PS512_PS,
28852 IX86_BUILTIN_ADDPD512,
28853 IX86_BUILTIN_ADDPS512,
28854 IX86_BUILTIN_ADDSD_ROUND,
28855 IX86_BUILTIN_ADDSS_ROUND,
28856 IX86_BUILTIN_ALIGND512,
28857 IX86_BUILTIN_ALIGNQ512,
28858 IX86_BUILTIN_BLENDMD512,
28859 IX86_BUILTIN_BLENDMPD512,
28860 IX86_BUILTIN_BLENDMPS512,
28861 IX86_BUILTIN_BLENDMQ512,
28862 IX86_BUILTIN_BROADCASTF32X4_512,
28863 IX86_BUILTIN_BROADCASTF64X4_512,
28864 IX86_BUILTIN_BROADCASTI32X4_512,
28865 IX86_BUILTIN_BROADCASTI64X4_512,
28866 IX86_BUILTIN_BROADCASTSD512,
28867 IX86_BUILTIN_BROADCASTSS512,
28868 IX86_BUILTIN_CMPD512,
28869 IX86_BUILTIN_CMPPD512,
28870 IX86_BUILTIN_CMPPS512,
28871 IX86_BUILTIN_CMPQ512,
28872 IX86_BUILTIN_CMPSD_MASK,
28873 IX86_BUILTIN_CMPSS_MASK,
28874 IX86_BUILTIN_COMIDF,
28875 IX86_BUILTIN_COMISF,
28876 IX86_BUILTIN_COMPRESSPD512,
28877 IX86_BUILTIN_COMPRESSPDSTORE512,
28878 IX86_BUILTIN_COMPRESSPS512,
28879 IX86_BUILTIN_COMPRESSPSSTORE512,
28880 IX86_BUILTIN_CVTDQ2PD512,
28881 IX86_BUILTIN_CVTDQ2PS512,
28882 IX86_BUILTIN_CVTPD2DQ512,
28883 IX86_BUILTIN_CVTPD2PS512,
28884 IX86_BUILTIN_CVTPD2UDQ512,
28885 IX86_BUILTIN_CVTPH2PS512,
28886 IX86_BUILTIN_CVTPS2DQ512,
28887 IX86_BUILTIN_CVTPS2PD512,
28888 IX86_BUILTIN_CVTPS2PH512,
28889 IX86_BUILTIN_CVTPS2UDQ512,
28890 IX86_BUILTIN_CVTSD2SS_ROUND,
28891 IX86_BUILTIN_CVTSI2SD64,
28892 IX86_BUILTIN_CVTSI2SS32,
28893 IX86_BUILTIN_CVTSI2SS64,
28894 IX86_BUILTIN_CVTSS2SD_ROUND,
28895 IX86_BUILTIN_CVTTPD2DQ512,
28896 IX86_BUILTIN_CVTTPD2UDQ512,
28897 IX86_BUILTIN_CVTTPS2DQ512,
28898 IX86_BUILTIN_CVTTPS2UDQ512,
28899 IX86_BUILTIN_CVTUDQ2PD512,
28900 IX86_BUILTIN_CVTUDQ2PS512,
28901 IX86_BUILTIN_CVTUSI2SD32,
28902 IX86_BUILTIN_CVTUSI2SD64,
28903 IX86_BUILTIN_CVTUSI2SS32,
28904 IX86_BUILTIN_CVTUSI2SS64,
28905 IX86_BUILTIN_DIVPD512,
28906 IX86_BUILTIN_DIVPS512,
28907 IX86_BUILTIN_DIVSD_ROUND,
28908 IX86_BUILTIN_DIVSS_ROUND,
28909 IX86_BUILTIN_EXPANDPD512,
28910 IX86_BUILTIN_EXPANDPD512Z,
28911 IX86_BUILTIN_EXPANDPDLOAD512,
28912 IX86_BUILTIN_EXPANDPDLOAD512Z,
28913 IX86_BUILTIN_EXPANDPS512,
28914 IX86_BUILTIN_EXPANDPS512Z,
28915 IX86_BUILTIN_EXPANDPSLOAD512,
28916 IX86_BUILTIN_EXPANDPSLOAD512Z,
28917 IX86_BUILTIN_EXTRACTF32X4,
28918 IX86_BUILTIN_EXTRACTF64X4,
28919 IX86_BUILTIN_EXTRACTI32X4,
28920 IX86_BUILTIN_EXTRACTI64X4,
28921 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28922 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28923 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28924 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28925 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28926 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28927 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28928 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28929 IX86_BUILTIN_GETEXPPD512,
28930 IX86_BUILTIN_GETEXPPS512,
28931 IX86_BUILTIN_GETEXPSD128,
28932 IX86_BUILTIN_GETEXPSS128,
28933 IX86_BUILTIN_GETMANTPD512,
28934 IX86_BUILTIN_GETMANTPS512,
28935 IX86_BUILTIN_GETMANTSD128,
28936 IX86_BUILTIN_GETMANTSS128,
28937 IX86_BUILTIN_INSERTF32X4,
28938 IX86_BUILTIN_INSERTF64X4,
28939 IX86_BUILTIN_INSERTI32X4,
28940 IX86_BUILTIN_INSERTI64X4,
28941 IX86_BUILTIN_LOADAPD512,
28942 IX86_BUILTIN_LOADAPS512,
28943 IX86_BUILTIN_LOADDQUDI512,
28944 IX86_BUILTIN_LOADDQUSI512,
28945 IX86_BUILTIN_LOADUPD512,
28946 IX86_BUILTIN_LOADUPS512,
28947 IX86_BUILTIN_MAXPD512,
28948 IX86_BUILTIN_MAXPS512,
28949 IX86_BUILTIN_MAXSD_ROUND,
28950 IX86_BUILTIN_MAXSS_ROUND,
28951 IX86_BUILTIN_MINPD512,
28952 IX86_BUILTIN_MINPS512,
28953 IX86_BUILTIN_MINSD_ROUND,
28954 IX86_BUILTIN_MINSS_ROUND,
28955 IX86_BUILTIN_MOVAPD512,
28956 IX86_BUILTIN_MOVAPS512,
28957 IX86_BUILTIN_MOVDDUP512,
28958 IX86_BUILTIN_MOVDQA32LOAD512,
28959 IX86_BUILTIN_MOVDQA32STORE512,
28960 IX86_BUILTIN_MOVDQA32_512,
28961 IX86_BUILTIN_MOVDQA64LOAD512,
28962 IX86_BUILTIN_MOVDQA64STORE512,
28963 IX86_BUILTIN_MOVDQA64_512,
28964 IX86_BUILTIN_MOVNTDQ512,
28965 IX86_BUILTIN_MOVNTDQA512,
28966 IX86_BUILTIN_MOVNTPD512,
28967 IX86_BUILTIN_MOVNTPS512,
28968 IX86_BUILTIN_MOVSHDUP512,
28969 IX86_BUILTIN_MOVSLDUP512,
28970 IX86_BUILTIN_MULPD512,
28971 IX86_BUILTIN_MULPS512,
28972 IX86_BUILTIN_MULSD_ROUND,
28973 IX86_BUILTIN_MULSS_ROUND,
28974 IX86_BUILTIN_PABSD512,
28975 IX86_BUILTIN_PABSQ512,
28976 IX86_BUILTIN_PADDD512,
28977 IX86_BUILTIN_PADDQ512,
28978 IX86_BUILTIN_PANDD512,
28979 IX86_BUILTIN_PANDND512,
28980 IX86_BUILTIN_PANDNQ512,
28981 IX86_BUILTIN_PANDQ512,
28982 IX86_BUILTIN_PBROADCASTD512,
28983 IX86_BUILTIN_PBROADCASTD512_GPR,
28984 IX86_BUILTIN_PBROADCASTMB512,
28985 IX86_BUILTIN_PBROADCASTMW512,
28986 IX86_BUILTIN_PBROADCASTQ512,
28987 IX86_BUILTIN_PBROADCASTQ512_GPR,
28988 IX86_BUILTIN_PCMPEQD512_MASK,
28989 IX86_BUILTIN_PCMPEQQ512_MASK,
28990 IX86_BUILTIN_PCMPGTD512_MASK,
28991 IX86_BUILTIN_PCMPGTQ512_MASK,
28992 IX86_BUILTIN_PCOMPRESSD512,
28993 IX86_BUILTIN_PCOMPRESSDSTORE512,
28994 IX86_BUILTIN_PCOMPRESSQ512,
28995 IX86_BUILTIN_PCOMPRESSQSTORE512,
28996 IX86_BUILTIN_PEXPANDD512,
28997 IX86_BUILTIN_PEXPANDD512Z,
28998 IX86_BUILTIN_PEXPANDDLOAD512,
28999 IX86_BUILTIN_PEXPANDDLOAD512Z,
29000 IX86_BUILTIN_PEXPANDQ512,
29001 IX86_BUILTIN_PEXPANDQ512Z,
29002 IX86_BUILTIN_PEXPANDQLOAD512,
29003 IX86_BUILTIN_PEXPANDQLOAD512Z,
29004 IX86_BUILTIN_PMAXSD512,
29005 IX86_BUILTIN_PMAXSQ512,
29006 IX86_BUILTIN_PMAXUD512,
29007 IX86_BUILTIN_PMAXUQ512,
29008 IX86_BUILTIN_PMINSD512,
29009 IX86_BUILTIN_PMINSQ512,
29010 IX86_BUILTIN_PMINUD512,
29011 IX86_BUILTIN_PMINUQ512,
29012 IX86_BUILTIN_PMOVDB512,
29013 IX86_BUILTIN_PMOVDB512_MEM,
29014 IX86_BUILTIN_PMOVDW512,
29015 IX86_BUILTIN_PMOVDW512_MEM,
29016 IX86_BUILTIN_PMOVQB512,
29017 IX86_BUILTIN_PMOVQB512_MEM,
29018 IX86_BUILTIN_PMOVQD512,
29019 IX86_BUILTIN_PMOVQD512_MEM,
29020 IX86_BUILTIN_PMOVQW512,
29021 IX86_BUILTIN_PMOVQW512_MEM,
29022 IX86_BUILTIN_PMOVSDB512,
29023 IX86_BUILTIN_PMOVSDB512_MEM,
29024 IX86_BUILTIN_PMOVSDW512,
29025 IX86_BUILTIN_PMOVSDW512_MEM,
29026 IX86_BUILTIN_PMOVSQB512,
29027 IX86_BUILTIN_PMOVSQB512_MEM,
29028 IX86_BUILTIN_PMOVSQD512,
29029 IX86_BUILTIN_PMOVSQD512_MEM,
29030 IX86_BUILTIN_PMOVSQW512,
29031 IX86_BUILTIN_PMOVSQW512_MEM,
29032 IX86_BUILTIN_PMOVSXBD512,
29033 IX86_BUILTIN_PMOVSXBQ512,
29034 IX86_BUILTIN_PMOVSXDQ512,
29035 IX86_BUILTIN_PMOVSXWD512,
29036 IX86_BUILTIN_PMOVSXWQ512,
29037 IX86_BUILTIN_PMOVUSDB512,
29038 IX86_BUILTIN_PMOVUSDB512_MEM,
29039 IX86_BUILTIN_PMOVUSDW512,
29040 IX86_BUILTIN_PMOVUSDW512_MEM,
29041 IX86_BUILTIN_PMOVUSQB512,
29042 IX86_BUILTIN_PMOVUSQB512_MEM,
29043 IX86_BUILTIN_PMOVUSQD512,
29044 IX86_BUILTIN_PMOVUSQD512_MEM,
29045 IX86_BUILTIN_PMOVUSQW512,
29046 IX86_BUILTIN_PMOVUSQW512_MEM,
29047 IX86_BUILTIN_PMOVZXBD512,
29048 IX86_BUILTIN_PMOVZXBQ512,
29049 IX86_BUILTIN_PMOVZXDQ512,
29050 IX86_BUILTIN_PMOVZXWD512,
29051 IX86_BUILTIN_PMOVZXWQ512,
29052 IX86_BUILTIN_PMULDQ512,
29053 IX86_BUILTIN_PMULLD512,
29054 IX86_BUILTIN_PMULUDQ512,
29055 IX86_BUILTIN_PORD512,
29056 IX86_BUILTIN_PORQ512,
29057 IX86_BUILTIN_PROLD512,
29058 IX86_BUILTIN_PROLQ512,
29059 IX86_BUILTIN_PROLVD512,
29060 IX86_BUILTIN_PROLVQ512,
29061 IX86_BUILTIN_PRORD512,
29062 IX86_BUILTIN_PRORQ512,
29063 IX86_BUILTIN_PRORVD512,
29064 IX86_BUILTIN_PRORVQ512,
29065 IX86_BUILTIN_PSHUFD512,
29066 IX86_BUILTIN_PSLLD512,
29067 IX86_BUILTIN_PSLLDI512,
29068 IX86_BUILTIN_PSLLQ512,
29069 IX86_BUILTIN_PSLLQI512,
29070 IX86_BUILTIN_PSLLVV16SI,
29071 IX86_BUILTIN_PSLLVV8DI,
29072 IX86_BUILTIN_PSRAD512,
29073 IX86_BUILTIN_PSRADI512,
29074 IX86_BUILTIN_PSRAQ512,
29075 IX86_BUILTIN_PSRAQI512,
29076 IX86_BUILTIN_PSRAVV16SI,
29077 IX86_BUILTIN_PSRAVV8DI,
29078 IX86_BUILTIN_PSRLD512,
29079 IX86_BUILTIN_PSRLDI512,
29080 IX86_BUILTIN_PSRLQ512,
29081 IX86_BUILTIN_PSRLQI512,
29082 IX86_BUILTIN_PSRLVV16SI,
29083 IX86_BUILTIN_PSRLVV8DI,
29084 IX86_BUILTIN_PSUBD512,
29085 IX86_BUILTIN_PSUBQ512,
29086 IX86_BUILTIN_PTESTMD512,
29087 IX86_BUILTIN_PTESTMQ512,
29088 IX86_BUILTIN_PTESTNMD512,
29089 IX86_BUILTIN_PTESTNMQ512,
29090 IX86_BUILTIN_PUNPCKHDQ512,
29091 IX86_BUILTIN_PUNPCKHQDQ512,
29092 IX86_BUILTIN_PUNPCKLDQ512,
29093 IX86_BUILTIN_PUNPCKLQDQ512,
29094 IX86_BUILTIN_PXORD512,
29095 IX86_BUILTIN_PXORQ512,
29096 IX86_BUILTIN_RCP14PD512,
29097 IX86_BUILTIN_RCP14PS512,
29098 IX86_BUILTIN_RCP14SD,
29099 IX86_BUILTIN_RCP14SS,
29100 IX86_BUILTIN_RNDSCALEPD,
29101 IX86_BUILTIN_RNDSCALEPS,
29102 IX86_BUILTIN_RNDSCALESD,
29103 IX86_BUILTIN_RNDSCALESS,
29104 IX86_BUILTIN_RSQRT14PD512,
29105 IX86_BUILTIN_RSQRT14PS512,
29106 IX86_BUILTIN_RSQRT14SD,
29107 IX86_BUILTIN_RSQRT14SS,
29108 IX86_BUILTIN_SCALEFPD512,
29109 IX86_BUILTIN_SCALEFPS512,
29110 IX86_BUILTIN_SCALEFSD,
29111 IX86_BUILTIN_SCALEFSS,
29112 IX86_BUILTIN_SHUFPD512,
29113 IX86_BUILTIN_SHUFPS512,
29114 IX86_BUILTIN_SHUF_F32x4,
29115 IX86_BUILTIN_SHUF_F64x2,
29116 IX86_BUILTIN_SHUF_I32x4,
29117 IX86_BUILTIN_SHUF_I64x2,
29118 IX86_BUILTIN_SQRTPD512,
29119 IX86_BUILTIN_SQRTPD512_MASK,
29120 IX86_BUILTIN_SQRTPS512_MASK,
29121 IX86_BUILTIN_SQRTPS_NR512,
29122 IX86_BUILTIN_SQRTSD_ROUND,
29123 IX86_BUILTIN_SQRTSS_ROUND,
29124 IX86_BUILTIN_STOREAPD512,
29125 IX86_BUILTIN_STOREAPS512,
29126 IX86_BUILTIN_STOREDQUDI512,
29127 IX86_BUILTIN_STOREDQUSI512,
29128 IX86_BUILTIN_STOREUPD512,
29129 IX86_BUILTIN_STOREUPS512,
29130 IX86_BUILTIN_SUBPD512,
29131 IX86_BUILTIN_SUBPS512,
29132 IX86_BUILTIN_SUBSD_ROUND,
29133 IX86_BUILTIN_SUBSS_ROUND,
29134 IX86_BUILTIN_UCMPD512,
29135 IX86_BUILTIN_UCMPQ512,
29136 IX86_BUILTIN_UNPCKHPD512,
29137 IX86_BUILTIN_UNPCKHPS512,
29138 IX86_BUILTIN_UNPCKLPD512,
29139 IX86_BUILTIN_UNPCKLPS512,
29140 IX86_BUILTIN_VCVTSD2SI32,
29141 IX86_BUILTIN_VCVTSD2SI64,
29142 IX86_BUILTIN_VCVTSD2USI32,
29143 IX86_BUILTIN_VCVTSD2USI64,
29144 IX86_BUILTIN_VCVTSS2SI32,
29145 IX86_BUILTIN_VCVTSS2SI64,
29146 IX86_BUILTIN_VCVTSS2USI32,
29147 IX86_BUILTIN_VCVTSS2USI64,
29148 IX86_BUILTIN_VCVTTSD2SI32,
29149 IX86_BUILTIN_VCVTTSD2SI64,
29150 IX86_BUILTIN_VCVTTSD2USI32,
29151 IX86_BUILTIN_VCVTTSD2USI64,
29152 IX86_BUILTIN_VCVTTSS2SI32,
29153 IX86_BUILTIN_VCVTTSS2SI64,
29154 IX86_BUILTIN_VCVTTSS2USI32,
29155 IX86_BUILTIN_VCVTTSS2USI64,
29156 IX86_BUILTIN_VFMADDPD512_MASK,
29157 IX86_BUILTIN_VFMADDPD512_MASK3,
29158 IX86_BUILTIN_VFMADDPD512_MASKZ,
29159 IX86_BUILTIN_VFMADDPS512_MASK,
29160 IX86_BUILTIN_VFMADDPS512_MASK3,
29161 IX86_BUILTIN_VFMADDPS512_MASKZ,
29162 IX86_BUILTIN_VFMADDSD3_ROUND,
29163 IX86_BUILTIN_VFMADDSS3_ROUND,
29164 IX86_BUILTIN_VFMADDSUBPD512_MASK,
29165 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
29166 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
29167 IX86_BUILTIN_VFMADDSUBPS512_MASK,
29168 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
29169 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
29170 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
29171 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
29172 IX86_BUILTIN_VFMSUBPD512_MASK3,
29173 IX86_BUILTIN_VFMSUBPS512_MASK3,
29174 IX86_BUILTIN_VFMSUBSD3_MASK3,
29175 IX86_BUILTIN_VFMSUBSS3_MASK3,
29176 IX86_BUILTIN_VFNMADDPD512_MASK,
29177 IX86_BUILTIN_VFNMADDPS512_MASK,
29178 IX86_BUILTIN_VFNMSUBPD512_MASK,
29179 IX86_BUILTIN_VFNMSUBPD512_MASK3,
29180 IX86_BUILTIN_VFNMSUBPS512_MASK,
29181 IX86_BUILTIN_VFNMSUBPS512_MASK3,
29182 IX86_BUILTIN_VPCLZCNTD512,
29183 IX86_BUILTIN_VPCLZCNTQ512,
29184 IX86_BUILTIN_VPCONFLICTD512,
29185 IX86_BUILTIN_VPCONFLICTQ512,
29186 IX86_BUILTIN_VPERMDF512,
29187 IX86_BUILTIN_VPERMDI512,
29188 IX86_BUILTIN_VPERMI2VARD512,
29189 IX86_BUILTIN_VPERMI2VARPD512,
29190 IX86_BUILTIN_VPERMI2VARPS512,
29191 IX86_BUILTIN_VPERMI2VARQ512,
29192 IX86_BUILTIN_VPERMILPD512,
29193 IX86_BUILTIN_VPERMILPS512,
29194 IX86_BUILTIN_VPERMILVARPD512,
29195 IX86_BUILTIN_VPERMILVARPS512,
29196 IX86_BUILTIN_VPERMT2VARD512,
29197 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
29198 IX86_BUILTIN_VPERMT2VARPD512,
29199 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
29200 IX86_BUILTIN_VPERMT2VARPS512,
29201 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
29202 IX86_BUILTIN_VPERMT2VARQ512,
29203 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
29204 IX86_BUILTIN_VPERMVARDF512,
29205 IX86_BUILTIN_VPERMVARDI512,
29206 IX86_BUILTIN_VPERMVARSF512,
29207 IX86_BUILTIN_VPERMVARSI512,
29208 IX86_BUILTIN_VTERNLOGD512_MASK,
29209 IX86_BUILTIN_VTERNLOGD512_MASKZ,
29210 IX86_BUILTIN_VTERNLOGQ512_MASK,
29211 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
29213 /* Mask arithmetic operations */
29214 IX86_BUILTIN_KAND16,
29215 IX86_BUILTIN_KANDN16,
29216 IX86_BUILTIN_KNOT16,
29217 IX86_BUILTIN_KOR16,
29218 IX86_BUILTIN_KORTESTC16,
29219 IX86_BUILTIN_KORTESTZ16,
29220 IX86_BUILTIN_KUNPCKBW,
29221 IX86_BUILTIN_KXNOR16,
29222 IX86_BUILTIN_KXOR16,
29223 IX86_BUILTIN_KMOV16,
29225 /* AVX512VL. */
29226 IX86_BUILTIN_PMOVUSQD256_MEM,
29227 IX86_BUILTIN_PMOVUSQD128_MEM,
29228 IX86_BUILTIN_PMOVSQD256_MEM,
29229 IX86_BUILTIN_PMOVSQD128_MEM,
29230 IX86_BUILTIN_PMOVQD256_MEM,
29231 IX86_BUILTIN_PMOVQD128_MEM,
29232 IX86_BUILTIN_PMOVUSQW256_MEM,
29233 IX86_BUILTIN_PMOVUSQW128_MEM,
29234 IX86_BUILTIN_PMOVSQW256_MEM,
29235 IX86_BUILTIN_PMOVSQW128_MEM,
29236 IX86_BUILTIN_PMOVQW256_MEM,
29237 IX86_BUILTIN_PMOVQW128_MEM,
29238 IX86_BUILTIN_PMOVUSQB256_MEM,
29239 IX86_BUILTIN_PMOVUSQB128_MEM,
29240 IX86_BUILTIN_PMOVSQB256_MEM,
29241 IX86_BUILTIN_PMOVSQB128_MEM,
29242 IX86_BUILTIN_PMOVQB256_MEM,
29243 IX86_BUILTIN_PMOVQB128_MEM,
29244 IX86_BUILTIN_PMOVUSDW256_MEM,
29245 IX86_BUILTIN_PMOVUSDW128_MEM,
29246 IX86_BUILTIN_PMOVSDW256_MEM,
29247 IX86_BUILTIN_PMOVSDW128_MEM,
29248 IX86_BUILTIN_PMOVDW256_MEM,
29249 IX86_BUILTIN_PMOVDW128_MEM,
29250 IX86_BUILTIN_PMOVUSDB256_MEM,
29251 IX86_BUILTIN_PMOVUSDB128_MEM,
29252 IX86_BUILTIN_PMOVSDB256_MEM,
29253 IX86_BUILTIN_PMOVSDB128_MEM,
29254 IX86_BUILTIN_PMOVDB256_MEM,
29255 IX86_BUILTIN_PMOVDB128_MEM,
29256 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29257 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29258 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29259 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29260 IX86_BUILTIN_MOVDQA64STORE256_MASK,
29261 IX86_BUILTIN_MOVDQA64STORE128_MASK,
29262 IX86_BUILTIN_MOVDQA32STORE256_MASK,
29263 IX86_BUILTIN_MOVDQA32STORE128_MASK,
29264 IX86_BUILTIN_LOADAPD256_MASK,
29265 IX86_BUILTIN_LOADAPD128_MASK,
29266 IX86_BUILTIN_LOADAPS256_MASK,
29267 IX86_BUILTIN_LOADAPS128_MASK,
29268 IX86_BUILTIN_STOREAPD256_MASK,
29269 IX86_BUILTIN_STOREAPD128_MASK,
29270 IX86_BUILTIN_STOREAPS256_MASK,
29271 IX86_BUILTIN_STOREAPS128_MASK,
29272 IX86_BUILTIN_LOADUPD256_MASK,
29273 IX86_BUILTIN_LOADUPD128_MASK,
29274 IX86_BUILTIN_LOADUPS256_MASK,
29275 IX86_BUILTIN_LOADUPS128_MASK,
29276 IX86_BUILTIN_STOREUPD256_MASK,
29277 IX86_BUILTIN_STOREUPD128_MASK,
29278 IX86_BUILTIN_STOREUPS256_MASK,
29279 IX86_BUILTIN_STOREUPS128_MASK,
29280 IX86_BUILTIN_LOADDQUDI256_MASK,
29281 IX86_BUILTIN_LOADDQUDI128_MASK,
29282 IX86_BUILTIN_LOADDQUSI256_MASK,
29283 IX86_BUILTIN_LOADDQUSI128_MASK,
29284 IX86_BUILTIN_LOADDQUHI256_MASK,
29285 IX86_BUILTIN_LOADDQUHI128_MASK,
29286 IX86_BUILTIN_LOADDQUQI256_MASK,
29287 IX86_BUILTIN_LOADDQUQI128_MASK,
29288 IX86_BUILTIN_STOREDQUDI256_MASK,
29289 IX86_BUILTIN_STOREDQUDI128_MASK,
29290 IX86_BUILTIN_STOREDQUSI256_MASK,
29291 IX86_BUILTIN_STOREDQUSI128_MASK,
29292 IX86_BUILTIN_STOREDQUHI256_MASK,
29293 IX86_BUILTIN_STOREDQUHI128_MASK,
29294 IX86_BUILTIN_STOREDQUQI256_MASK,
29295 IX86_BUILTIN_STOREDQUQI128_MASK,
29296 IX86_BUILTIN_COMPRESSPDSTORE256,
29297 IX86_BUILTIN_COMPRESSPDSTORE128,
29298 IX86_BUILTIN_COMPRESSPSSTORE256,
29299 IX86_BUILTIN_COMPRESSPSSTORE128,
29300 IX86_BUILTIN_PCOMPRESSQSTORE256,
29301 IX86_BUILTIN_PCOMPRESSQSTORE128,
29302 IX86_BUILTIN_PCOMPRESSDSTORE256,
29303 IX86_BUILTIN_PCOMPRESSDSTORE128,
29304 IX86_BUILTIN_EXPANDPDLOAD256,
29305 IX86_BUILTIN_EXPANDPDLOAD128,
29306 IX86_BUILTIN_EXPANDPSLOAD256,
29307 IX86_BUILTIN_EXPANDPSLOAD128,
29308 IX86_BUILTIN_PEXPANDQLOAD256,
29309 IX86_BUILTIN_PEXPANDQLOAD128,
29310 IX86_BUILTIN_PEXPANDDLOAD256,
29311 IX86_BUILTIN_PEXPANDDLOAD128,
29312 IX86_BUILTIN_EXPANDPDLOAD256Z,
29313 IX86_BUILTIN_EXPANDPDLOAD128Z,
29314 IX86_BUILTIN_EXPANDPSLOAD256Z,
29315 IX86_BUILTIN_EXPANDPSLOAD128Z,
29316 IX86_BUILTIN_PEXPANDQLOAD256Z,
29317 IX86_BUILTIN_PEXPANDQLOAD128Z,
29318 IX86_BUILTIN_PEXPANDDLOAD256Z,
29319 IX86_BUILTIN_PEXPANDDLOAD128Z,
29320 IX86_BUILTIN_PALIGNR256_MASK,
29321 IX86_BUILTIN_PALIGNR128_MASK,
29322 IX86_BUILTIN_MOVDQA64_256_MASK,
29323 IX86_BUILTIN_MOVDQA64_128_MASK,
29324 IX86_BUILTIN_MOVDQA32_256_MASK,
29325 IX86_BUILTIN_MOVDQA32_128_MASK,
29326 IX86_BUILTIN_MOVAPD256_MASK,
29327 IX86_BUILTIN_MOVAPD128_MASK,
29328 IX86_BUILTIN_MOVAPS256_MASK,
29329 IX86_BUILTIN_MOVAPS128_MASK,
29330 IX86_BUILTIN_MOVDQUHI256_MASK,
29331 IX86_BUILTIN_MOVDQUHI128_MASK,
29332 IX86_BUILTIN_MOVDQUQI256_MASK,
29333 IX86_BUILTIN_MOVDQUQI128_MASK,
29334 IX86_BUILTIN_MINPS128_MASK,
29335 IX86_BUILTIN_MAXPS128_MASK,
29336 IX86_BUILTIN_MINPD128_MASK,
29337 IX86_BUILTIN_MAXPD128_MASK,
29338 IX86_BUILTIN_MAXPD256_MASK,
29339 IX86_BUILTIN_MAXPS256_MASK,
29340 IX86_BUILTIN_MINPD256_MASK,
29341 IX86_BUILTIN_MINPS256_MASK,
29342 IX86_BUILTIN_MULPS128_MASK,
29343 IX86_BUILTIN_DIVPS128_MASK,
29344 IX86_BUILTIN_MULPD128_MASK,
29345 IX86_BUILTIN_DIVPD128_MASK,
29346 IX86_BUILTIN_DIVPD256_MASK,
29347 IX86_BUILTIN_DIVPS256_MASK,
29348 IX86_BUILTIN_MULPD256_MASK,
29349 IX86_BUILTIN_MULPS256_MASK,
29350 IX86_BUILTIN_ADDPD128_MASK,
29351 IX86_BUILTIN_ADDPD256_MASK,
29352 IX86_BUILTIN_ADDPS128_MASK,
29353 IX86_BUILTIN_ADDPS256_MASK,
29354 IX86_BUILTIN_SUBPD128_MASK,
29355 IX86_BUILTIN_SUBPD256_MASK,
29356 IX86_BUILTIN_SUBPS128_MASK,
29357 IX86_BUILTIN_SUBPS256_MASK,
29358 IX86_BUILTIN_XORPD256_MASK,
29359 IX86_BUILTIN_XORPD128_MASK,
29360 IX86_BUILTIN_XORPS256_MASK,
29361 IX86_BUILTIN_XORPS128_MASK,
29362 IX86_BUILTIN_ORPD256_MASK,
29363 IX86_BUILTIN_ORPD128_MASK,
29364 IX86_BUILTIN_ORPS256_MASK,
29365 IX86_BUILTIN_ORPS128_MASK,
29366 IX86_BUILTIN_BROADCASTF32x2_256,
29367 IX86_BUILTIN_BROADCASTI32x2_256,
29368 IX86_BUILTIN_BROADCASTI32x2_128,
29369 IX86_BUILTIN_BROADCASTF64X2_256,
29370 IX86_BUILTIN_BROADCASTI64X2_256,
29371 IX86_BUILTIN_BROADCASTF32X4_256,
29372 IX86_BUILTIN_BROADCASTI32X4_256,
29373 IX86_BUILTIN_EXTRACTF32X4_256,
29374 IX86_BUILTIN_EXTRACTI32X4_256,
29375 IX86_BUILTIN_DBPSADBW256,
29376 IX86_BUILTIN_DBPSADBW128,
29377 IX86_BUILTIN_CVTTPD2QQ256,
29378 IX86_BUILTIN_CVTTPD2QQ128,
29379 IX86_BUILTIN_CVTTPD2UQQ256,
29380 IX86_BUILTIN_CVTTPD2UQQ128,
29381 IX86_BUILTIN_CVTPD2QQ256,
29382 IX86_BUILTIN_CVTPD2QQ128,
29383 IX86_BUILTIN_CVTPD2UQQ256,
29384 IX86_BUILTIN_CVTPD2UQQ128,
29385 IX86_BUILTIN_CVTPD2UDQ256_MASK,
29386 IX86_BUILTIN_CVTPD2UDQ128_MASK,
29387 IX86_BUILTIN_CVTTPS2QQ256,
29388 IX86_BUILTIN_CVTTPS2QQ128,
29389 IX86_BUILTIN_CVTTPS2UQQ256,
29390 IX86_BUILTIN_CVTTPS2UQQ128,
29391 IX86_BUILTIN_CVTTPS2DQ256_MASK,
29392 IX86_BUILTIN_CVTTPS2DQ128_MASK,
29393 IX86_BUILTIN_CVTTPS2UDQ256,
29394 IX86_BUILTIN_CVTTPS2UDQ128,
29395 IX86_BUILTIN_CVTTPD2DQ256_MASK,
29396 IX86_BUILTIN_CVTTPD2DQ128_MASK,
29397 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29398 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29399 IX86_BUILTIN_CVTPD2DQ256_MASK,
29400 IX86_BUILTIN_CVTPD2DQ128_MASK,
29401 IX86_BUILTIN_CVTDQ2PD256_MASK,
29402 IX86_BUILTIN_CVTDQ2PD128_MASK,
29403 IX86_BUILTIN_CVTUDQ2PD256_MASK,
29404 IX86_BUILTIN_CVTUDQ2PD128_MASK,
29405 IX86_BUILTIN_CVTDQ2PS256_MASK,
29406 IX86_BUILTIN_CVTDQ2PS128_MASK,
29407 IX86_BUILTIN_CVTUDQ2PS256_MASK,
29408 IX86_BUILTIN_CVTUDQ2PS128_MASK,
29409 IX86_BUILTIN_CVTPS2PD256_MASK,
29410 IX86_BUILTIN_CVTPS2PD128_MASK,
29411 IX86_BUILTIN_PBROADCASTB256_MASK,
29412 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29413 IX86_BUILTIN_PBROADCASTB128_MASK,
29414 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29415 IX86_BUILTIN_PBROADCASTW256_MASK,
29416 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29417 IX86_BUILTIN_PBROADCASTW128_MASK,
29418 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29419 IX86_BUILTIN_PBROADCASTD256_MASK,
29420 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29421 IX86_BUILTIN_PBROADCASTD128_MASK,
29422 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29423 IX86_BUILTIN_PBROADCASTQ256_MASK,
29424 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29425 IX86_BUILTIN_PBROADCASTQ128_MASK,
29426 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29427 IX86_BUILTIN_BROADCASTSS256,
29428 IX86_BUILTIN_BROADCASTSS128,
29429 IX86_BUILTIN_BROADCASTSD256,
29430 IX86_BUILTIN_EXTRACTF64X2_256,
29431 IX86_BUILTIN_EXTRACTI64X2_256,
29432 IX86_BUILTIN_INSERTF32X4_256,
29433 IX86_BUILTIN_INSERTI32X4_256,
29434 IX86_BUILTIN_PMOVSXBW256_MASK,
29435 IX86_BUILTIN_PMOVSXBW128_MASK,
29436 IX86_BUILTIN_PMOVSXBD256_MASK,
29437 IX86_BUILTIN_PMOVSXBD128_MASK,
29438 IX86_BUILTIN_PMOVSXBQ256_MASK,
29439 IX86_BUILTIN_PMOVSXBQ128_MASK,
29440 IX86_BUILTIN_PMOVSXWD256_MASK,
29441 IX86_BUILTIN_PMOVSXWD128_MASK,
29442 IX86_BUILTIN_PMOVSXWQ256_MASK,
29443 IX86_BUILTIN_PMOVSXWQ128_MASK,
29444 IX86_BUILTIN_PMOVSXDQ256_MASK,
29445 IX86_BUILTIN_PMOVSXDQ128_MASK,
29446 IX86_BUILTIN_PMOVZXBW256_MASK,
29447 IX86_BUILTIN_PMOVZXBW128_MASK,
29448 IX86_BUILTIN_PMOVZXBD256_MASK,
29449 IX86_BUILTIN_PMOVZXBD128_MASK,
29450 IX86_BUILTIN_PMOVZXBQ256_MASK,
29451 IX86_BUILTIN_PMOVZXBQ128_MASK,
29452 IX86_BUILTIN_PMOVZXWD256_MASK,
29453 IX86_BUILTIN_PMOVZXWD128_MASK,
29454 IX86_BUILTIN_PMOVZXWQ256_MASK,
29455 IX86_BUILTIN_PMOVZXWQ128_MASK,
29456 IX86_BUILTIN_PMOVZXDQ256_MASK,
29457 IX86_BUILTIN_PMOVZXDQ128_MASK,
29458 IX86_BUILTIN_REDUCEPD256_MASK,
29459 IX86_BUILTIN_REDUCEPD128_MASK,
29460 IX86_BUILTIN_REDUCEPS256_MASK,
29461 IX86_BUILTIN_REDUCEPS128_MASK,
29462 IX86_BUILTIN_REDUCESD_MASK,
29463 IX86_BUILTIN_REDUCESS_MASK,
29464 IX86_BUILTIN_VPERMVARHI256_MASK,
29465 IX86_BUILTIN_VPERMVARHI128_MASK,
29466 IX86_BUILTIN_VPERMT2VARHI256,
29467 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29468 IX86_BUILTIN_VPERMT2VARHI128,
29469 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29470 IX86_BUILTIN_VPERMI2VARHI256,
29471 IX86_BUILTIN_VPERMI2VARHI128,
29472 IX86_BUILTIN_RCP14PD256,
29473 IX86_BUILTIN_RCP14PD128,
29474 IX86_BUILTIN_RCP14PS256,
29475 IX86_BUILTIN_RCP14PS128,
29476 IX86_BUILTIN_RSQRT14PD256_MASK,
29477 IX86_BUILTIN_RSQRT14PD128_MASK,
29478 IX86_BUILTIN_RSQRT14PS256_MASK,
29479 IX86_BUILTIN_RSQRT14PS128_MASK,
29480 IX86_BUILTIN_SQRTPD256_MASK,
29481 IX86_BUILTIN_SQRTPD128_MASK,
29482 IX86_BUILTIN_SQRTPS256_MASK,
29483 IX86_BUILTIN_SQRTPS128_MASK,
29484 IX86_BUILTIN_PADDB128_MASK,
29485 IX86_BUILTIN_PADDW128_MASK,
29486 IX86_BUILTIN_PADDD128_MASK,
29487 IX86_BUILTIN_PADDQ128_MASK,
29488 IX86_BUILTIN_PSUBB128_MASK,
29489 IX86_BUILTIN_PSUBW128_MASK,
29490 IX86_BUILTIN_PSUBD128_MASK,
29491 IX86_BUILTIN_PSUBQ128_MASK,
29492 IX86_BUILTIN_PADDSB128_MASK,
29493 IX86_BUILTIN_PADDSW128_MASK,
29494 IX86_BUILTIN_PSUBSB128_MASK,
29495 IX86_BUILTIN_PSUBSW128_MASK,
29496 IX86_BUILTIN_PADDUSB128_MASK,
29497 IX86_BUILTIN_PADDUSW128_MASK,
29498 IX86_BUILTIN_PSUBUSB128_MASK,
29499 IX86_BUILTIN_PSUBUSW128_MASK,
29500 IX86_BUILTIN_PADDB256_MASK,
29501 IX86_BUILTIN_PADDW256_MASK,
29502 IX86_BUILTIN_PADDD256_MASK,
29503 IX86_BUILTIN_PADDQ256_MASK,
29504 IX86_BUILTIN_PADDSB256_MASK,
29505 IX86_BUILTIN_PADDSW256_MASK,
29506 IX86_BUILTIN_PADDUSB256_MASK,
29507 IX86_BUILTIN_PADDUSW256_MASK,
29508 IX86_BUILTIN_PSUBB256_MASK,
29509 IX86_BUILTIN_PSUBW256_MASK,
29510 IX86_BUILTIN_PSUBD256_MASK,
29511 IX86_BUILTIN_PSUBQ256_MASK,
29512 IX86_BUILTIN_PSUBSB256_MASK,
29513 IX86_BUILTIN_PSUBSW256_MASK,
29514 IX86_BUILTIN_PSUBUSB256_MASK,
29515 IX86_BUILTIN_PSUBUSW256_MASK,
29516 IX86_BUILTIN_SHUF_F64x2_256,
29517 IX86_BUILTIN_SHUF_I64x2_256,
29518 IX86_BUILTIN_SHUF_I32x4_256,
29519 IX86_BUILTIN_SHUF_F32x4_256,
29520 IX86_BUILTIN_PMOVWB128,
29521 IX86_BUILTIN_PMOVWB256,
29522 IX86_BUILTIN_PMOVSWB128,
29523 IX86_BUILTIN_PMOVSWB256,
29524 IX86_BUILTIN_PMOVUSWB128,
29525 IX86_BUILTIN_PMOVUSWB256,
29526 IX86_BUILTIN_PMOVDB128,
29527 IX86_BUILTIN_PMOVDB256,
29528 IX86_BUILTIN_PMOVSDB128,
29529 IX86_BUILTIN_PMOVSDB256,
29530 IX86_BUILTIN_PMOVUSDB128,
29531 IX86_BUILTIN_PMOVUSDB256,
29532 IX86_BUILTIN_PMOVDW128,
29533 IX86_BUILTIN_PMOVDW256,
29534 IX86_BUILTIN_PMOVSDW128,
29535 IX86_BUILTIN_PMOVSDW256,
29536 IX86_BUILTIN_PMOVUSDW128,
29537 IX86_BUILTIN_PMOVUSDW256,
29538 IX86_BUILTIN_PMOVQB128,
29539 IX86_BUILTIN_PMOVQB256,
29540 IX86_BUILTIN_PMOVSQB128,
29541 IX86_BUILTIN_PMOVSQB256,
29542 IX86_BUILTIN_PMOVUSQB128,
29543 IX86_BUILTIN_PMOVUSQB256,
29544 IX86_BUILTIN_PMOVQW128,
29545 IX86_BUILTIN_PMOVQW256,
29546 IX86_BUILTIN_PMOVSQW128,
29547 IX86_BUILTIN_PMOVSQW256,
29548 IX86_BUILTIN_PMOVUSQW128,
29549 IX86_BUILTIN_PMOVUSQW256,
29550 IX86_BUILTIN_PMOVQD128,
29551 IX86_BUILTIN_PMOVQD256,
29552 IX86_BUILTIN_PMOVSQD128,
29553 IX86_BUILTIN_PMOVSQD256,
29554 IX86_BUILTIN_PMOVUSQD128,
29555 IX86_BUILTIN_PMOVUSQD256,
29556 IX86_BUILTIN_RANGEPD256,
29557 IX86_BUILTIN_RANGEPD128,
29558 IX86_BUILTIN_RANGEPS256,
29559 IX86_BUILTIN_RANGEPS128,
29560 IX86_BUILTIN_GETEXPPS256,
29561 IX86_BUILTIN_GETEXPPD256,
29562 IX86_BUILTIN_GETEXPPS128,
29563 IX86_BUILTIN_GETEXPPD128,
29564 IX86_BUILTIN_FIXUPIMMPD256_MASK,
29565 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29566 IX86_BUILTIN_FIXUPIMMPS256_MASK,
29567 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29568 IX86_BUILTIN_FIXUPIMMPD128_MASK,
29569 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29570 IX86_BUILTIN_FIXUPIMMPS128_MASK,
29571 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29572 IX86_BUILTIN_PABSQ256,
29573 IX86_BUILTIN_PABSQ128,
29574 IX86_BUILTIN_PABSD256_MASK,
29575 IX86_BUILTIN_PABSD128_MASK,
29576 IX86_BUILTIN_PMULHRSW256_MASK,
29577 IX86_BUILTIN_PMULHRSW128_MASK,
29578 IX86_BUILTIN_PMULHUW128_MASK,
29579 IX86_BUILTIN_PMULHUW256_MASK,
29580 IX86_BUILTIN_PMULHW256_MASK,
29581 IX86_BUILTIN_PMULHW128_MASK,
29582 IX86_BUILTIN_PMULLW256_MASK,
29583 IX86_BUILTIN_PMULLW128_MASK,
29584 IX86_BUILTIN_PMULLQ256,
29585 IX86_BUILTIN_PMULLQ128,
29586 IX86_BUILTIN_ANDPD256_MASK,
29587 IX86_BUILTIN_ANDPD128_MASK,
29588 IX86_BUILTIN_ANDPS256_MASK,
29589 IX86_BUILTIN_ANDPS128_MASK,
29590 IX86_BUILTIN_ANDNPD256_MASK,
29591 IX86_BUILTIN_ANDNPD128_MASK,
29592 IX86_BUILTIN_ANDNPS256_MASK,
29593 IX86_BUILTIN_ANDNPS128_MASK,
29594 IX86_BUILTIN_PSLLWI128_MASK,
29595 IX86_BUILTIN_PSLLDI128_MASK,
29596 IX86_BUILTIN_PSLLQI128_MASK,
29597 IX86_BUILTIN_PSLLW128_MASK,
29598 IX86_BUILTIN_PSLLD128_MASK,
29599 IX86_BUILTIN_PSLLQ128_MASK,
29600 IX86_BUILTIN_PSLLWI256_MASK ,
29601 IX86_BUILTIN_PSLLW256_MASK,
29602 IX86_BUILTIN_PSLLDI256_MASK,
29603 IX86_BUILTIN_PSLLD256_MASK,
29604 IX86_BUILTIN_PSLLQI256_MASK,
29605 IX86_BUILTIN_PSLLQ256_MASK,
29606 IX86_BUILTIN_PSRADI128_MASK,
29607 IX86_BUILTIN_PSRAD128_MASK,
29608 IX86_BUILTIN_PSRADI256_MASK,
29609 IX86_BUILTIN_PSRAD256_MASK,
29610 IX86_BUILTIN_PSRAQI128_MASK,
29611 IX86_BUILTIN_PSRAQ128_MASK,
29612 IX86_BUILTIN_PSRAQI256_MASK,
29613 IX86_BUILTIN_PSRAQ256_MASK,
29614 IX86_BUILTIN_PANDD256,
29615 IX86_BUILTIN_PANDD128,
29616 IX86_BUILTIN_PSRLDI128_MASK,
29617 IX86_BUILTIN_PSRLD128_MASK,
29618 IX86_BUILTIN_PSRLDI256_MASK,
29619 IX86_BUILTIN_PSRLD256_MASK,
29620 IX86_BUILTIN_PSRLQI128_MASK,
29621 IX86_BUILTIN_PSRLQ128_MASK,
29622 IX86_BUILTIN_PSRLQI256_MASK,
29623 IX86_BUILTIN_PSRLQ256_MASK,
29624 IX86_BUILTIN_PANDQ256,
29625 IX86_BUILTIN_PANDQ128,
29626 IX86_BUILTIN_PANDND256,
29627 IX86_BUILTIN_PANDND128,
29628 IX86_BUILTIN_PANDNQ256,
29629 IX86_BUILTIN_PANDNQ128,
29630 IX86_BUILTIN_PORD256,
29631 IX86_BUILTIN_PORD128,
29632 IX86_BUILTIN_PORQ256,
29633 IX86_BUILTIN_PORQ128,
29634 IX86_BUILTIN_PXORD256,
29635 IX86_BUILTIN_PXORD128,
29636 IX86_BUILTIN_PXORQ256,
29637 IX86_BUILTIN_PXORQ128,
29638 IX86_BUILTIN_PACKSSWB256_MASK,
29639 IX86_BUILTIN_PACKSSWB128_MASK,
29640 IX86_BUILTIN_PACKUSWB256_MASK,
29641 IX86_BUILTIN_PACKUSWB128_MASK,
29642 IX86_BUILTIN_RNDSCALEPS256,
29643 IX86_BUILTIN_RNDSCALEPD256,
29644 IX86_BUILTIN_RNDSCALEPS128,
29645 IX86_BUILTIN_RNDSCALEPD128,
29646 IX86_BUILTIN_VTERNLOGQ256_MASK,
29647 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29648 IX86_BUILTIN_VTERNLOGD256_MASK,
29649 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29650 IX86_BUILTIN_VTERNLOGQ128_MASK,
29651 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29652 IX86_BUILTIN_VTERNLOGD128_MASK,
29653 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29654 IX86_BUILTIN_SCALEFPD256,
29655 IX86_BUILTIN_SCALEFPS256,
29656 IX86_BUILTIN_SCALEFPD128,
29657 IX86_BUILTIN_SCALEFPS128,
29658 IX86_BUILTIN_VFMADDPD256_MASK,
29659 IX86_BUILTIN_VFMADDPD256_MASK3,
29660 IX86_BUILTIN_VFMADDPD256_MASKZ,
29661 IX86_BUILTIN_VFMADDPD128_MASK,
29662 IX86_BUILTIN_VFMADDPD128_MASK3,
29663 IX86_BUILTIN_VFMADDPD128_MASKZ,
29664 IX86_BUILTIN_VFMADDPS256_MASK,
29665 IX86_BUILTIN_VFMADDPS256_MASK3,
29666 IX86_BUILTIN_VFMADDPS256_MASKZ,
29667 IX86_BUILTIN_VFMADDPS128_MASK,
29668 IX86_BUILTIN_VFMADDPS128_MASK3,
29669 IX86_BUILTIN_VFMADDPS128_MASKZ,
29670 IX86_BUILTIN_VFMSUBPD256_MASK3,
29671 IX86_BUILTIN_VFMSUBPD128_MASK3,
29672 IX86_BUILTIN_VFMSUBPS256_MASK3,
29673 IX86_BUILTIN_VFMSUBPS128_MASK3,
29674 IX86_BUILTIN_VFNMADDPD256_MASK,
29675 IX86_BUILTIN_VFNMADDPD128_MASK,
29676 IX86_BUILTIN_VFNMADDPS256_MASK,
29677 IX86_BUILTIN_VFNMADDPS128_MASK,
29678 IX86_BUILTIN_VFNMSUBPD256_MASK,
29679 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29680 IX86_BUILTIN_VFNMSUBPD128_MASK,
29681 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29682 IX86_BUILTIN_VFNMSUBPS256_MASK,
29683 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29684 IX86_BUILTIN_VFNMSUBPS128_MASK,
29685 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29686 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29687 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29688 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29689 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29690 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29691 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29692 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29693 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29694 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29695 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29696 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29697 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29698 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29699 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29700 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29701 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29702 IX86_BUILTIN_INSERTF64X2_256,
29703 IX86_BUILTIN_INSERTI64X2_256,
29704 IX86_BUILTIN_PSRAVV16HI,
29705 IX86_BUILTIN_PSRAVV8HI,
29706 IX86_BUILTIN_PMADDUBSW256_MASK,
29707 IX86_BUILTIN_PMADDUBSW128_MASK,
29708 IX86_BUILTIN_PMADDWD256_MASK,
29709 IX86_BUILTIN_PMADDWD128_MASK,
29710 IX86_BUILTIN_PSRLVV16HI,
29711 IX86_BUILTIN_PSRLVV8HI,
29712 IX86_BUILTIN_CVTPS2DQ256_MASK,
29713 IX86_BUILTIN_CVTPS2DQ128_MASK,
29714 IX86_BUILTIN_CVTPS2UDQ256,
29715 IX86_BUILTIN_CVTPS2UDQ128,
29716 IX86_BUILTIN_CVTPS2QQ256,
29717 IX86_BUILTIN_CVTPS2QQ128,
29718 IX86_BUILTIN_CVTPS2UQQ256,
29719 IX86_BUILTIN_CVTPS2UQQ128,
29720 IX86_BUILTIN_GETMANTPS256,
29721 IX86_BUILTIN_GETMANTPS128,
29722 IX86_BUILTIN_GETMANTPD256,
29723 IX86_BUILTIN_GETMANTPD128,
29724 IX86_BUILTIN_MOVDDUP256_MASK,
29725 IX86_BUILTIN_MOVDDUP128_MASK,
29726 IX86_BUILTIN_MOVSHDUP256_MASK,
29727 IX86_BUILTIN_MOVSHDUP128_MASK,
29728 IX86_BUILTIN_MOVSLDUP256_MASK,
29729 IX86_BUILTIN_MOVSLDUP128_MASK,
29730 IX86_BUILTIN_CVTQQ2PS256,
29731 IX86_BUILTIN_CVTQQ2PS128,
29732 IX86_BUILTIN_CVTUQQ2PS256,
29733 IX86_BUILTIN_CVTUQQ2PS128,
29734 IX86_BUILTIN_CVTQQ2PD256,
29735 IX86_BUILTIN_CVTQQ2PD128,
29736 IX86_BUILTIN_CVTUQQ2PD256,
29737 IX86_BUILTIN_CVTUQQ2PD128,
29738 IX86_BUILTIN_VPERMT2VARQ256,
29739 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29740 IX86_BUILTIN_VPERMT2VARD256,
29741 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29742 IX86_BUILTIN_VPERMI2VARQ256,
29743 IX86_BUILTIN_VPERMI2VARD256,
29744 IX86_BUILTIN_VPERMT2VARPD256,
29745 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29746 IX86_BUILTIN_VPERMT2VARPS256,
29747 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29748 IX86_BUILTIN_VPERMI2VARPD256,
29749 IX86_BUILTIN_VPERMI2VARPS256,
29750 IX86_BUILTIN_VPERMT2VARQ128,
29751 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29752 IX86_BUILTIN_VPERMT2VARD128,
29753 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29754 IX86_BUILTIN_VPERMI2VARQ128,
29755 IX86_BUILTIN_VPERMI2VARD128,
29756 IX86_BUILTIN_VPERMT2VARPD128,
29757 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29758 IX86_BUILTIN_VPERMT2VARPS128,
29759 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29760 IX86_BUILTIN_VPERMI2VARPD128,
29761 IX86_BUILTIN_VPERMI2VARPS128,
29762 IX86_BUILTIN_PSHUFB256_MASK,
29763 IX86_BUILTIN_PSHUFB128_MASK,
29764 IX86_BUILTIN_PSHUFHW256_MASK,
29765 IX86_BUILTIN_PSHUFHW128_MASK,
29766 IX86_BUILTIN_PSHUFLW256_MASK,
29767 IX86_BUILTIN_PSHUFLW128_MASK,
29768 IX86_BUILTIN_PSHUFD256_MASK,
29769 IX86_BUILTIN_PSHUFD128_MASK,
29770 IX86_BUILTIN_SHUFPD256_MASK,
29771 IX86_BUILTIN_SHUFPD128_MASK,
29772 IX86_BUILTIN_SHUFPS256_MASK,
29773 IX86_BUILTIN_SHUFPS128_MASK,
29774 IX86_BUILTIN_PROLVQ256,
29775 IX86_BUILTIN_PROLVQ128,
29776 IX86_BUILTIN_PROLQ256,
29777 IX86_BUILTIN_PROLQ128,
29778 IX86_BUILTIN_PRORVQ256,
29779 IX86_BUILTIN_PRORVQ128,
29780 IX86_BUILTIN_PRORQ256,
29781 IX86_BUILTIN_PRORQ128,
29782 IX86_BUILTIN_PSRAVQ128,
29783 IX86_BUILTIN_PSRAVQ256,
29784 IX86_BUILTIN_PSLLVV4DI_MASK,
29785 IX86_BUILTIN_PSLLVV2DI_MASK,
29786 IX86_BUILTIN_PSLLVV8SI_MASK,
29787 IX86_BUILTIN_PSLLVV4SI_MASK,
29788 IX86_BUILTIN_PSRAVV8SI_MASK,
29789 IX86_BUILTIN_PSRAVV4SI_MASK,
29790 IX86_BUILTIN_PSRLVV4DI_MASK,
29791 IX86_BUILTIN_PSRLVV2DI_MASK,
29792 IX86_BUILTIN_PSRLVV8SI_MASK,
29793 IX86_BUILTIN_PSRLVV4SI_MASK,
29794 IX86_BUILTIN_PSRAWI256_MASK,
29795 IX86_BUILTIN_PSRAW256_MASK,
29796 IX86_BUILTIN_PSRAWI128_MASK,
29797 IX86_BUILTIN_PSRAW128_MASK,
29798 IX86_BUILTIN_PSRLWI256_MASK,
29799 IX86_BUILTIN_PSRLW256_MASK,
29800 IX86_BUILTIN_PSRLWI128_MASK,
29801 IX86_BUILTIN_PSRLW128_MASK,
29802 IX86_BUILTIN_PRORVD256,
29803 IX86_BUILTIN_PROLVD256,
29804 IX86_BUILTIN_PRORD256,
29805 IX86_BUILTIN_PROLD256,
29806 IX86_BUILTIN_PRORVD128,
29807 IX86_BUILTIN_PROLVD128,
29808 IX86_BUILTIN_PRORD128,
29809 IX86_BUILTIN_PROLD128,
29810 IX86_BUILTIN_FPCLASSPD256,
29811 IX86_BUILTIN_FPCLASSPD128,
29812 IX86_BUILTIN_FPCLASSSD,
29813 IX86_BUILTIN_FPCLASSPS256,
29814 IX86_BUILTIN_FPCLASSPS128,
29815 IX86_BUILTIN_FPCLASSSS,
29816 IX86_BUILTIN_CVTB2MASK128,
29817 IX86_BUILTIN_CVTB2MASK256,
29818 IX86_BUILTIN_CVTW2MASK128,
29819 IX86_BUILTIN_CVTW2MASK256,
29820 IX86_BUILTIN_CVTD2MASK128,
29821 IX86_BUILTIN_CVTD2MASK256,
29822 IX86_BUILTIN_CVTQ2MASK128,
29823 IX86_BUILTIN_CVTQ2MASK256,
29824 IX86_BUILTIN_CVTMASK2B128,
29825 IX86_BUILTIN_CVTMASK2B256,
29826 IX86_BUILTIN_CVTMASK2W128,
29827 IX86_BUILTIN_CVTMASK2W256,
29828 IX86_BUILTIN_CVTMASK2D128,
29829 IX86_BUILTIN_CVTMASK2D256,
29830 IX86_BUILTIN_CVTMASK2Q128,
29831 IX86_BUILTIN_CVTMASK2Q256,
29832 IX86_BUILTIN_PCMPEQB128_MASK,
29833 IX86_BUILTIN_PCMPEQB256_MASK,
29834 IX86_BUILTIN_PCMPEQW128_MASK,
29835 IX86_BUILTIN_PCMPEQW256_MASK,
29836 IX86_BUILTIN_PCMPEQD128_MASK,
29837 IX86_BUILTIN_PCMPEQD256_MASK,
29838 IX86_BUILTIN_PCMPEQQ128_MASK,
29839 IX86_BUILTIN_PCMPEQQ256_MASK,
29840 IX86_BUILTIN_PCMPGTB128_MASK,
29841 IX86_BUILTIN_PCMPGTB256_MASK,
29842 IX86_BUILTIN_PCMPGTW128_MASK,
29843 IX86_BUILTIN_PCMPGTW256_MASK,
29844 IX86_BUILTIN_PCMPGTD128_MASK,
29845 IX86_BUILTIN_PCMPGTD256_MASK,
29846 IX86_BUILTIN_PCMPGTQ128_MASK,
29847 IX86_BUILTIN_PCMPGTQ256_MASK,
29848 IX86_BUILTIN_PTESTMB128,
29849 IX86_BUILTIN_PTESTMB256,
29850 IX86_BUILTIN_PTESTMW128,
29851 IX86_BUILTIN_PTESTMW256,
29852 IX86_BUILTIN_PTESTMD128,
29853 IX86_BUILTIN_PTESTMD256,
29854 IX86_BUILTIN_PTESTMQ128,
29855 IX86_BUILTIN_PTESTMQ256,
29856 IX86_BUILTIN_PTESTNMB128,
29857 IX86_BUILTIN_PTESTNMB256,
29858 IX86_BUILTIN_PTESTNMW128,
29859 IX86_BUILTIN_PTESTNMW256,
29860 IX86_BUILTIN_PTESTNMD128,
29861 IX86_BUILTIN_PTESTNMD256,
29862 IX86_BUILTIN_PTESTNMQ128,
29863 IX86_BUILTIN_PTESTNMQ256,
29864 IX86_BUILTIN_PBROADCASTMB128,
29865 IX86_BUILTIN_PBROADCASTMB256,
29866 IX86_BUILTIN_PBROADCASTMW128,
29867 IX86_BUILTIN_PBROADCASTMW256,
29868 IX86_BUILTIN_COMPRESSPD256,
29869 IX86_BUILTIN_COMPRESSPD128,
29870 IX86_BUILTIN_COMPRESSPS256,
29871 IX86_BUILTIN_COMPRESSPS128,
29872 IX86_BUILTIN_PCOMPRESSQ256,
29873 IX86_BUILTIN_PCOMPRESSQ128,
29874 IX86_BUILTIN_PCOMPRESSD256,
29875 IX86_BUILTIN_PCOMPRESSD128,
29876 IX86_BUILTIN_EXPANDPD256,
29877 IX86_BUILTIN_EXPANDPD128,
29878 IX86_BUILTIN_EXPANDPS256,
29879 IX86_BUILTIN_EXPANDPS128,
29880 IX86_BUILTIN_PEXPANDQ256,
29881 IX86_BUILTIN_PEXPANDQ128,
29882 IX86_BUILTIN_PEXPANDD256,
29883 IX86_BUILTIN_PEXPANDD128,
29884 IX86_BUILTIN_EXPANDPD256Z,
29885 IX86_BUILTIN_EXPANDPD128Z,
29886 IX86_BUILTIN_EXPANDPS256Z,
29887 IX86_BUILTIN_EXPANDPS128Z,
29888 IX86_BUILTIN_PEXPANDQ256Z,
29889 IX86_BUILTIN_PEXPANDQ128Z,
29890 IX86_BUILTIN_PEXPANDD256Z,
29891 IX86_BUILTIN_PEXPANDD128Z,
29892 IX86_BUILTIN_PMAXSD256_MASK,
29893 IX86_BUILTIN_PMINSD256_MASK,
29894 IX86_BUILTIN_PMAXUD256_MASK,
29895 IX86_BUILTIN_PMINUD256_MASK,
29896 IX86_BUILTIN_PMAXSD128_MASK,
29897 IX86_BUILTIN_PMINSD128_MASK,
29898 IX86_BUILTIN_PMAXUD128_MASK,
29899 IX86_BUILTIN_PMINUD128_MASK,
29900 IX86_BUILTIN_PMAXSQ256_MASK,
29901 IX86_BUILTIN_PMINSQ256_MASK,
29902 IX86_BUILTIN_PMAXUQ256_MASK,
29903 IX86_BUILTIN_PMINUQ256_MASK,
29904 IX86_BUILTIN_PMAXSQ128_MASK,
29905 IX86_BUILTIN_PMINSQ128_MASK,
29906 IX86_BUILTIN_PMAXUQ128_MASK,
29907 IX86_BUILTIN_PMINUQ128_MASK,
29908 IX86_BUILTIN_PMINSB256_MASK,
29909 IX86_BUILTIN_PMINUB256_MASK,
29910 IX86_BUILTIN_PMAXSB256_MASK,
29911 IX86_BUILTIN_PMAXUB256_MASK,
29912 IX86_BUILTIN_PMINSB128_MASK,
29913 IX86_BUILTIN_PMINUB128_MASK,
29914 IX86_BUILTIN_PMAXSB128_MASK,
29915 IX86_BUILTIN_PMAXUB128_MASK,
29916 IX86_BUILTIN_PMINSW256_MASK,
29917 IX86_BUILTIN_PMINUW256_MASK,
29918 IX86_BUILTIN_PMAXSW256_MASK,
29919 IX86_BUILTIN_PMAXUW256_MASK,
29920 IX86_BUILTIN_PMINSW128_MASK,
29921 IX86_BUILTIN_PMINUW128_MASK,
29922 IX86_BUILTIN_PMAXSW128_MASK,
29923 IX86_BUILTIN_PMAXUW128_MASK,
29924 IX86_BUILTIN_VPCONFLICTQ256,
29925 IX86_BUILTIN_VPCONFLICTD256,
29926 IX86_BUILTIN_VPCLZCNTQ256,
29927 IX86_BUILTIN_VPCLZCNTD256,
29928 IX86_BUILTIN_UNPCKHPD256_MASK,
29929 IX86_BUILTIN_UNPCKHPD128_MASK,
29930 IX86_BUILTIN_UNPCKHPS256_MASK,
29931 IX86_BUILTIN_UNPCKHPS128_MASK,
29932 IX86_BUILTIN_UNPCKLPD256_MASK,
29933 IX86_BUILTIN_UNPCKLPD128_MASK,
29934 IX86_BUILTIN_UNPCKLPS256_MASK,
29935 IX86_BUILTIN_VPCONFLICTQ128,
29936 IX86_BUILTIN_VPCONFLICTD128,
29937 IX86_BUILTIN_VPCLZCNTQ128,
29938 IX86_BUILTIN_VPCLZCNTD128,
29939 IX86_BUILTIN_UNPCKLPS128_MASK,
29940 IX86_BUILTIN_ALIGND256,
29941 IX86_BUILTIN_ALIGNQ256,
29942 IX86_BUILTIN_ALIGND128,
29943 IX86_BUILTIN_ALIGNQ128,
29944 IX86_BUILTIN_CVTPS2PH256_MASK,
29945 IX86_BUILTIN_CVTPS2PH_MASK,
29946 IX86_BUILTIN_CVTPH2PS_MASK,
29947 IX86_BUILTIN_CVTPH2PS256_MASK,
29948 IX86_BUILTIN_PUNPCKHDQ128_MASK,
29949 IX86_BUILTIN_PUNPCKHDQ256_MASK,
29950 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
29951 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
29952 IX86_BUILTIN_PUNPCKLDQ128_MASK,
29953 IX86_BUILTIN_PUNPCKLDQ256_MASK,
29954 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
29955 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
29956 IX86_BUILTIN_PUNPCKHBW128_MASK,
29957 IX86_BUILTIN_PUNPCKHBW256_MASK,
29958 IX86_BUILTIN_PUNPCKHWD128_MASK,
29959 IX86_BUILTIN_PUNPCKHWD256_MASK,
29960 IX86_BUILTIN_PUNPCKLBW128_MASK,
29961 IX86_BUILTIN_PUNPCKLBW256_MASK,
29962 IX86_BUILTIN_PUNPCKLWD128_MASK,
29963 IX86_BUILTIN_PUNPCKLWD256_MASK,
29964 IX86_BUILTIN_PSLLVV16HI,
29965 IX86_BUILTIN_PSLLVV8HI,
29966 IX86_BUILTIN_PACKSSDW256_MASK,
29967 IX86_BUILTIN_PACKSSDW128_MASK,
29968 IX86_BUILTIN_PACKUSDW256_MASK,
29969 IX86_BUILTIN_PACKUSDW128_MASK,
29970 IX86_BUILTIN_PAVGB256_MASK,
29971 IX86_BUILTIN_PAVGW256_MASK,
29972 IX86_BUILTIN_PAVGB128_MASK,
29973 IX86_BUILTIN_PAVGW128_MASK,
29974 IX86_BUILTIN_VPERMVARSF256_MASK,
29975 IX86_BUILTIN_VPERMVARDF256_MASK,
29976 IX86_BUILTIN_VPERMDF256_MASK,
29977 IX86_BUILTIN_PABSB256_MASK,
29978 IX86_BUILTIN_PABSB128_MASK,
29979 IX86_BUILTIN_PABSW256_MASK,
29980 IX86_BUILTIN_PABSW128_MASK,
29981 IX86_BUILTIN_VPERMILVARPD_MASK,
29982 IX86_BUILTIN_VPERMILVARPS_MASK,
29983 IX86_BUILTIN_VPERMILVARPD256_MASK,
29984 IX86_BUILTIN_VPERMILVARPS256_MASK,
29985 IX86_BUILTIN_VPERMILPD_MASK,
29986 IX86_BUILTIN_VPERMILPS_MASK,
29987 IX86_BUILTIN_VPERMILPD256_MASK,
29988 IX86_BUILTIN_VPERMILPS256_MASK,
29989 IX86_BUILTIN_BLENDMQ256,
29990 IX86_BUILTIN_BLENDMD256,
29991 IX86_BUILTIN_BLENDMPD256,
29992 IX86_BUILTIN_BLENDMPS256,
29993 IX86_BUILTIN_BLENDMQ128,
29994 IX86_BUILTIN_BLENDMD128,
29995 IX86_BUILTIN_BLENDMPD128,
29996 IX86_BUILTIN_BLENDMPS128,
29997 IX86_BUILTIN_BLENDMW256,
29998 IX86_BUILTIN_BLENDMB256,
29999 IX86_BUILTIN_BLENDMW128,
30000 IX86_BUILTIN_BLENDMB128,
30001 IX86_BUILTIN_PMULLD256_MASK,
30002 IX86_BUILTIN_PMULLD128_MASK,
30003 IX86_BUILTIN_PMULUDQ256_MASK,
30004 IX86_BUILTIN_PMULDQ256_MASK,
30005 IX86_BUILTIN_PMULDQ128_MASK,
30006 IX86_BUILTIN_PMULUDQ128_MASK,
30007 IX86_BUILTIN_CVTPD2PS256_MASK,
30008 IX86_BUILTIN_CVTPD2PS_MASK,
30009 IX86_BUILTIN_VPERMVARSI256_MASK,
30010 IX86_BUILTIN_VPERMVARDI256_MASK,
30011 IX86_BUILTIN_VPERMDI256_MASK,
30012 IX86_BUILTIN_CMPQ256,
30013 IX86_BUILTIN_CMPD256,
30014 IX86_BUILTIN_UCMPQ256,
30015 IX86_BUILTIN_UCMPD256,
30016 IX86_BUILTIN_CMPB256,
30017 IX86_BUILTIN_CMPW256,
30018 IX86_BUILTIN_UCMPB256,
30019 IX86_BUILTIN_UCMPW256,
30020 IX86_BUILTIN_CMPPD256_MASK,
30021 IX86_BUILTIN_CMPPS256_MASK,
30022 IX86_BUILTIN_CMPQ128,
30023 IX86_BUILTIN_CMPD128,
30024 IX86_BUILTIN_UCMPQ128,
30025 IX86_BUILTIN_UCMPD128,
30026 IX86_BUILTIN_CMPB128,
30027 IX86_BUILTIN_CMPW128,
30028 IX86_BUILTIN_UCMPB128,
30029 IX86_BUILTIN_UCMPW128,
30030 IX86_BUILTIN_CMPPD128_MASK,
30031 IX86_BUILTIN_CMPPS128_MASK,
30033 IX86_BUILTIN_GATHER3SIV8SF,
30034 IX86_BUILTIN_GATHER3SIV4SF,
30035 IX86_BUILTIN_GATHER3SIV4DF,
30036 IX86_BUILTIN_GATHER3SIV2DF,
30037 IX86_BUILTIN_GATHER3DIV8SF,
30038 IX86_BUILTIN_GATHER3DIV4SF,
30039 IX86_BUILTIN_GATHER3DIV4DF,
30040 IX86_BUILTIN_GATHER3DIV2DF,
30041 IX86_BUILTIN_GATHER3SIV8SI,
30042 IX86_BUILTIN_GATHER3SIV4SI,
30043 IX86_BUILTIN_GATHER3SIV4DI,
30044 IX86_BUILTIN_GATHER3SIV2DI,
30045 IX86_BUILTIN_GATHER3DIV8SI,
30046 IX86_BUILTIN_GATHER3DIV4SI,
30047 IX86_BUILTIN_GATHER3DIV4DI,
30048 IX86_BUILTIN_GATHER3DIV2DI,
30049 IX86_BUILTIN_SCATTERSIV8SF,
30050 IX86_BUILTIN_SCATTERSIV4SF,
30051 IX86_BUILTIN_SCATTERSIV4DF,
30052 IX86_BUILTIN_SCATTERSIV2DF,
30053 IX86_BUILTIN_SCATTERDIV8SF,
30054 IX86_BUILTIN_SCATTERDIV4SF,
30055 IX86_BUILTIN_SCATTERDIV4DF,
30056 IX86_BUILTIN_SCATTERDIV2DF,
30057 IX86_BUILTIN_SCATTERSIV8SI,
30058 IX86_BUILTIN_SCATTERSIV4SI,
30059 IX86_BUILTIN_SCATTERSIV4DI,
30060 IX86_BUILTIN_SCATTERSIV2DI,
30061 IX86_BUILTIN_SCATTERDIV8SI,
30062 IX86_BUILTIN_SCATTERDIV4SI,
30063 IX86_BUILTIN_SCATTERDIV4DI,
30064 IX86_BUILTIN_SCATTERDIV2DI,
30066 /* AVX512DQ. */
30067 IX86_BUILTIN_RANGESD128,
30068 IX86_BUILTIN_RANGESS128,
30069 IX86_BUILTIN_KUNPCKWD,
30070 IX86_BUILTIN_KUNPCKDQ,
30071 IX86_BUILTIN_BROADCASTF32x2_512,
30072 IX86_BUILTIN_BROADCASTI32x2_512,
30073 IX86_BUILTIN_BROADCASTF64X2_512,
30074 IX86_BUILTIN_BROADCASTI64X2_512,
30075 IX86_BUILTIN_BROADCASTF32X8_512,
30076 IX86_BUILTIN_BROADCASTI32X8_512,
30077 IX86_BUILTIN_EXTRACTF64X2_512,
30078 IX86_BUILTIN_EXTRACTF32X8,
30079 IX86_BUILTIN_EXTRACTI64X2_512,
30080 IX86_BUILTIN_EXTRACTI32X8,
30081 IX86_BUILTIN_REDUCEPD512_MASK,
30082 IX86_BUILTIN_REDUCEPS512_MASK,
30083 IX86_BUILTIN_PMULLQ512,
30084 IX86_BUILTIN_XORPD512,
30085 IX86_BUILTIN_XORPS512,
30086 IX86_BUILTIN_ORPD512,
30087 IX86_BUILTIN_ORPS512,
30088 IX86_BUILTIN_ANDPD512,
30089 IX86_BUILTIN_ANDPS512,
30090 IX86_BUILTIN_ANDNPD512,
30091 IX86_BUILTIN_ANDNPS512,
30092 IX86_BUILTIN_INSERTF32X8,
30093 IX86_BUILTIN_INSERTI32X8,
30094 IX86_BUILTIN_INSERTF64X2_512,
30095 IX86_BUILTIN_INSERTI64X2_512,
30096 IX86_BUILTIN_FPCLASSPD512,
30097 IX86_BUILTIN_FPCLASSPS512,
30098 IX86_BUILTIN_CVTD2MASK512,
30099 IX86_BUILTIN_CVTQ2MASK512,
30100 IX86_BUILTIN_CVTMASK2D512,
30101 IX86_BUILTIN_CVTMASK2Q512,
30102 IX86_BUILTIN_CVTPD2QQ512,
30103 IX86_BUILTIN_CVTPS2QQ512,
30104 IX86_BUILTIN_CVTPD2UQQ512,
30105 IX86_BUILTIN_CVTPS2UQQ512,
30106 IX86_BUILTIN_CVTQQ2PS512,
30107 IX86_BUILTIN_CVTUQQ2PS512,
30108 IX86_BUILTIN_CVTQQ2PD512,
30109 IX86_BUILTIN_CVTUQQ2PD512,
30110 IX86_BUILTIN_CVTTPS2QQ512,
30111 IX86_BUILTIN_CVTTPS2UQQ512,
30112 IX86_BUILTIN_CVTTPD2QQ512,
30113 IX86_BUILTIN_CVTTPD2UQQ512,
30114 IX86_BUILTIN_RANGEPS512,
30115 IX86_BUILTIN_RANGEPD512,
30117 /* AVX512BW. */
30118 IX86_BUILTIN_PACKUSDW512,
30119 IX86_BUILTIN_PACKSSDW512,
30120 IX86_BUILTIN_LOADDQUHI512_MASK,
30121 IX86_BUILTIN_LOADDQUQI512_MASK,
30122 IX86_BUILTIN_PSLLDQ512,
30123 IX86_BUILTIN_PSRLDQ512,
30124 IX86_BUILTIN_STOREDQUHI512_MASK,
30125 IX86_BUILTIN_STOREDQUQI512_MASK,
30126 IX86_BUILTIN_PALIGNR512,
30127 IX86_BUILTIN_PALIGNR512_MASK,
30128 IX86_BUILTIN_MOVDQUHI512_MASK,
30129 IX86_BUILTIN_MOVDQUQI512_MASK,
30130 IX86_BUILTIN_PSADBW512,
30131 IX86_BUILTIN_DBPSADBW512,
30132 IX86_BUILTIN_PBROADCASTB512,
30133 IX86_BUILTIN_PBROADCASTB512_GPR,
30134 IX86_BUILTIN_PBROADCASTW512,
30135 IX86_BUILTIN_PBROADCASTW512_GPR,
30136 IX86_BUILTIN_PMOVSXBW512_MASK,
30137 IX86_BUILTIN_PMOVZXBW512_MASK,
30138 IX86_BUILTIN_VPERMVARHI512_MASK,
30139 IX86_BUILTIN_VPERMT2VARHI512,
30140 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
30141 IX86_BUILTIN_VPERMI2VARHI512,
30142 IX86_BUILTIN_PAVGB512,
30143 IX86_BUILTIN_PAVGW512,
30144 IX86_BUILTIN_PADDB512,
30145 IX86_BUILTIN_PSUBB512,
30146 IX86_BUILTIN_PSUBSB512,
30147 IX86_BUILTIN_PADDSB512,
30148 IX86_BUILTIN_PSUBUSB512,
30149 IX86_BUILTIN_PADDUSB512,
30150 IX86_BUILTIN_PSUBW512,
30151 IX86_BUILTIN_PADDW512,
30152 IX86_BUILTIN_PSUBSW512,
30153 IX86_BUILTIN_PADDSW512,
30154 IX86_BUILTIN_PSUBUSW512,
30155 IX86_BUILTIN_PADDUSW512,
30156 IX86_BUILTIN_PMAXUW512,
30157 IX86_BUILTIN_PMAXSW512,
30158 IX86_BUILTIN_PMINUW512,
30159 IX86_BUILTIN_PMINSW512,
30160 IX86_BUILTIN_PMAXUB512,
30161 IX86_BUILTIN_PMAXSB512,
30162 IX86_BUILTIN_PMINUB512,
30163 IX86_BUILTIN_PMINSB512,
30164 IX86_BUILTIN_PMOVWB512,
30165 IX86_BUILTIN_PMOVSWB512,
30166 IX86_BUILTIN_PMOVUSWB512,
30167 IX86_BUILTIN_PMULHRSW512_MASK,
30168 IX86_BUILTIN_PMULHUW512_MASK,
30169 IX86_BUILTIN_PMULHW512_MASK,
30170 IX86_BUILTIN_PMULLW512_MASK,
30171 IX86_BUILTIN_PSLLWI512_MASK,
30172 IX86_BUILTIN_PSLLW512_MASK,
30173 IX86_BUILTIN_PACKSSWB512,
30174 IX86_BUILTIN_PACKUSWB512,
30175 IX86_BUILTIN_PSRAVV32HI,
30176 IX86_BUILTIN_PMADDUBSW512_MASK,
30177 IX86_BUILTIN_PMADDWD512_MASK,
30178 IX86_BUILTIN_PSRLVV32HI,
30179 IX86_BUILTIN_PUNPCKHBW512,
30180 IX86_BUILTIN_PUNPCKHWD512,
30181 IX86_BUILTIN_PUNPCKLBW512,
30182 IX86_BUILTIN_PUNPCKLWD512,
30183 IX86_BUILTIN_PSHUFB512,
30184 IX86_BUILTIN_PSHUFHW512,
30185 IX86_BUILTIN_PSHUFLW512,
30186 IX86_BUILTIN_PSRAWI512,
30187 IX86_BUILTIN_PSRAW512,
30188 IX86_BUILTIN_PSRLWI512,
30189 IX86_BUILTIN_PSRLW512,
30190 IX86_BUILTIN_CVTB2MASK512,
30191 IX86_BUILTIN_CVTW2MASK512,
30192 IX86_BUILTIN_CVTMASK2B512,
30193 IX86_BUILTIN_CVTMASK2W512,
30194 IX86_BUILTIN_PCMPEQB512_MASK,
30195 IX86_BUILTIN_PCMPEQW512_MASK,
30196 IX86_BUILTIN_PCMPGTB512_MASK,
30197 IX86_BUILTIN_PCMPGTW512_MASK,
30198 IX86_BUILTIN_PTESTMB512,
30199 IX86_BUILTIN_PTESTMW512,
30200 IX86_BUILTIN_PTESTNMB512,
30201 IX86_BUILTIN_PTESTNMW512,
30202 IX86_BUILTIN_PSLLVV32HI,
30203 IX86_BUILTIN_PABSB512,
30204 IX86_BUILTIN_PABSW512,
30205 IX86_BUILTIN_BLENDMW512,
30206 IX86_BUILTIN_BLENDMB512,
30207 IX86_BUILTIN_CMPB512,
30208 IX86_BUILTIN_CMPW512,
30209 IX86_BUILTIN_UCMPB512,
30210 IX86_BUILTIN_UCMPW512,
30212 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30213 where all operands are 32-byte or 64-byte wide respectively. */
30214 IX86_BUILTIN_GATHERALTSIV4DF,
30215 IX86_BUILTIN_GATHERALTDIV8SF,
30216 IX86_BUILTIN_GATHERALTSIV4DI,
30217 IX86_BUILTIN_GATHERALTDIV8SI,
30218 IX86_BUILTIN_GATHER3ALTDIV16SF,
30219 IX86_BUILTIN_GATHER3ALTDIV16SI,
30220 IX86_BUILTIN_GATHER3ALTSIV4DF,
30221 IX86_BUILTIN_GATHER3ALTDIV8SF,
30222 IX86_BUILTIN_GATHER3ALTSIV4DI,
30223 IX86_BUILTIN_GATHER3ALTDIV8SI,
30224 IX86_BUILTIN_GATHER3ALTSIV8DF,
30225 IX86_BUILTIN_GATHER3ALTSIV8DI,
30226 IX86_BUILTIN_GATHER3DIV16SF,
30227 IX86_BUILTIN_GATHER3DIV16SI,
30228 IX86_BUILTIN_GATHER3DIV8DF,
30229 IX86_BUILTIN_GATHER3DIV8DI,
30230 IX86_BUILTIN_GATHER3SIV16SF,
30231 IX86_BUILTIN_GATHER3SIV16SI,
30232 IX86_BUILTIN_GATHER3SIV8DF,
30233 IX86_BUILTIN_GATHER3SIV8DI,
30234 IX86_BUILTIN_SCATTERDIV16SF,
30235 IX86_BUILTIN_SCATTERDIV16SI,
30236 IX86_BUILTIN_SCATTERDIV8DF,
30237 IX86_BUILTIN_SCATTERDIV8DI,
30238 IX86_BUILTIN_SCATTERSIV16SF,
30239 IX86_BUILTIN_SCATTERSIV16SI,
30240 IX86_BUILTIN_SCATTERSIV8DF,
30241 IX86_BUILTIN_SCATTERSIV8DI,
30243 /* AVX512PF */
30244 IX86_BUILTIN_GATHERPFQPD,
30245 IX86_BUILTIN_GATHERPFDPS,
30246 IX86_BUILTIN_GATHERPFDPD,
30247 IX86_BUILTIN_GATHERPFQPS,
30248 IX86_BUILTIN_SCATTERPFDPD,
30249 IX86_BUILTIN_SCATTERPFDPS,
30250 IX86_BUILTIN_SCATTERPFQPD,
30251 IX86_BUILTIN_SCATTERPFQPS,
30253 /* AVX-512ER */
30254 IX86_BUILTIN_EXP2PD_MASK,
30255 IX86_BUILTIN_EXP2PS_MASK,
30256 IX86_BUILTIN_EXP2PS,
30257 IX86_BUILTIN_RCP28PD,
30258 IX86_BUILTIN_RCP28PS,
30259 IX86_BUILTIN_RCP28SD,
30260 IX86_BUILTIN_RCP28SS,
30261 IX86_BUILTIN_RSQRT28PD,
30262 IX86_BUILTIN_RSQRT28PS,
30263 IX86_BUILTIN_RSQRT28SD,
30264 IX86_BUILTIN_RSQRT28SS,
30266 /* AVX-512IFMA */
30267 IX86_BUILTIN_VPMADD52LUQ512,
30268 IX86_BUILTIN_VPMADD52HUQ512,
30269 IX86_BUILTIN_VPMADD52LUQ256,
30270 IX86_BUILTIN_VPMADD52HUQ256,
30271 IX86_BUILTIN_VPMADD52LUQ128,
30272 IX86_BUILTIN_VPMADD52HUQ128,
30273 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
30274 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
30275 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
30276 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
30277 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
30278 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
30280 /* AVX-512VBMI */
30281 IX86_BUILTIN_VPMULTISHIFTQB512,
30282 IX86_BUILTIN_VPMULTISHIFTQB256,
30283 IX86_BUILTIN_VPMULTISHIFTQB128,
30284 IX86_BUILTIN_VPERMVARQI512_MASK,
30285 IX86_BUILTIN_VPERMT2VARQI512,
30286 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
30287 IX86_BUILTIN_VPERMI2VARQI512,
30288 IX86_BUILTIN_VPERMVARQI256_MASK,
30289 IX86_BUILTIN_VPERMVARQI128_MASK,
30290 IX86_BUILTIN_VPERMT2VARQI256,
30291 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
30292 IX86_BUILTIN_VPERMT2VARQI128,
30293 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
30294 IX86_BUILTIN_VPERMI2VARQI256,
30295 IX86_BUILTIN_VPERMI2VARQI128,
30297 /* SHA builtins. */
30298 IX86_BUILTIN_SHA1MSG1,
30299 IX86_BUILTIN_SHA1MSG2,
30300 IX86_BUILTIN_SHA1NEXTE,
30301 IX86_BUILTIN_SHA1RNDS4,
30302 IX86_BUILTIN_SHA256MSG1,
30303 IX86_BUILTIN_SHA256MSG2,
30304 IX86_BUILTIN_SHA256RNDS2,
30306 /* CLWB instructions. */
30307 IX86_BUILTIN_CLWB,
30309 /* PCOMMIT instructions. */
30310 IX86_BUILTIN_PCOMMIT,
30312 /* CLFLUSHOPT instructions. */
30313 IX86_BUILTIN_CLFLUSHOPT,
30315 /* TFmode support builtins. */
30316 IX86_BUILTIN_INFQ,
30317 IX86_BUILTIN_HUGE_VALQ,
30318 IX86_BUILTIN_FABSQ,
30319 IX86_BUILTIN_COPYSIGNQ,
30321 /* Vectorizer support builtins. */
30322 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30323 IX86_BUILTIN_CPYSGNPS,
30324 IX86_BUILTIN_CPYSGNPD,
30325 IX86_BUILTIN_CPYSGNPS256,
30326 IX86_BUILTIN_CPYSGNPS512,
30327 IX86_BUILTIN_CPYSGNPD256,
30328 IX86_BUILTIN_CPYSGNPD512,
30329 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30330 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30333 /* FMA4 instructions. */
30334 IX86_BUILTIN_VFMADDSS,
30335 IX86_BUILTIN_VFMADDSD,
30336 IX86_BUILTIN_VFMADDPS,
30337 IX86_BUILTIN_VFMADDPD,
30338 IX86_BUILTIN_VFMADDPS256,
30339 IX86_BUILTIN_VFMADDPD256,
30340 IX86_BUILTIN_VFMADDSUBPS,
30341 IX86_BUILTIN_VFMADDSUBPD,
30342 IX86_BUILTIN_VFMADDSUBPS256,
30343 IX86_BUILTIN_VFMADDSUBPD256,
30345 /* FMA3 instructions. */
30346 IX86_BUILTIN_VFMADDSS3,
30347 IX86_BUILTIN_VFMADDSD3,
30349 /* XOP instructions. */
30350 IX86_BUILTIN_VPCMOV,
30351 IX86_BUILTIN_VPCMOV_V2DI,
30352 IX86_BUILTIN_VPCMOV_V4SI,
30353 IX86_BUILTIN_VPCMOV_V8HI,
30354 IX86_BUILTIN_VPCMOV_V16QI,
30355 IX86_BUILTIN_VPCMOV_V4SF,
30356 IX86_BUILTIN_VPCMOV_V2DF,
30357 IX86_BUILTIN_VPCMOV256,
30358 IX86_BUILTIN_VPCMOV_V4DI256,
30359 IX86_BUILTIN_VPCMOV_V8SI256,
30360 IX86_BUILTIN_VPCMOV_V16HI256,
30361 IX86_BUILTIN_VPCMOV_V32QI256,
30362 IX86_BUILTIN_VPCMOV_V8SF256,
30363 IX86_BUILTIN_VPCMOV_V4DF256,
30365 IX86_BUILTIN_VPPERM,
30367 IX86_BUILTIN_VPMACSSWW,
30368 IX86_BUILTIN_VPMACSWW,
30369 IX86_BUILTIN_VPMACSSWD,
30370 IX86_BUILTIN_VPMACSWD,
30371 IX86_BUILTIN_VPMACSSDD,
30372 IX86_BUILTIN_VPMACSDD,
30373 IX86_BUILTIN_VPMACSSDQL,
30374 IX86_BUILTIN_VPMACSSDQH,
30375 IX86_BUILTIN_VPMACSDQL,
30376 IX86_BUILTIN_VPMACSDQH,
30377 IX86_BUILTIN_VPMADCSSWD,
30378 IX86_BUILTIN_VPMADCSWD,
30380 IX86_BUILTIN_VPHADDBW,
30381 IX86_BUILTIN_VPHADDBD,
30382 IX86_BUILTIN_VPHADDBQ,
30383 IX86_BUILTIN_VPHADDWD,
30384 IX86_BUILTIN_VPHADDWQ,
30385 IX86_BUILTIN_VPHADDDQ,
30386 IX86_BUILTIN_VPHADDUBW,
30387 IX86_BUILTIN_VPHADDUBD,
30388 IX86_BUILTIN_VPHADDUBQ,
30389 IX86_BUILTIN_VPHADDUWD,
30390 IX86_BUILTIN_VPHADDUWQ,
30391 IX86_BUILTIN_VPHADDUDQ,
30392 IX86_BUILTIN_VPHSUBBW,
30393 IX86_BUILTIN_VPHSUBWD,
30394 IX86_BUILTIN_VPHSUBDQ,
30396 IX86_BUILTIN_VPROTB,
30397 IX86_BUILTIN_VPROTW,
30398 IX86_BUILTIN_VPROTD,
30399 IX86_BUILTIN_VPROTQ,
30400 IX86_BUILTIN_VPROTB_IMM,
30401 IX86_BUILTIN_VPROTW_IMM,
30402 IX86_BUILTIN_VPROTD_IMM,
30403 IX86_BUILTIN_VPROTQ_IMM,
30405 IX86_BUILTIN_VPSHLB,
30406 IX86_BUILTIN_VPSHLW,
30407 IX86_BUILTIN_VPSHLD,
30408 IX86_BUILTIN_VPSHLQ,
30409 IX86_BUILTIN_VPSHAB,
30410 IX86_BUILTIN_VPSHAW,
30411 IX86_BUILTIN_VPSHAD,
30412 IX86_BUILTIN_VPSHAQ,
30414 IX86_BUILTIN_VFRCZSS,
30415 IX86_BUILTIN_VFRCZSD,
30416 IX86_BUILTIN_VFRCZPS,
30417 IX86_BUILTIN_VFRCZPD,
30418 IX86_BUILTIN_VFRCZPS256,
30419 IX86_BUILTIN_VFRCZPD256,
30421 IX86_BUILTIN_VPCOMEQUB,
30422 IX86_BUILTIN_VPCOMNEUB,
30423 IX86_BUILTIN_VPCOMLTUB,
30424 IX86_BUILTIN_VPCOMLEUB,
30425 IX86_BUILTIN_VPCOMGTUB,
30426 IX86_BUILTIN_VPCOMGEUB,
30427 IX86_BUILTIN_VPCOMFALSEUB,
30428 IX86_BUILTIN_VPCOMTRUEUB,
30430 IX86_BUILTIN_VPCOMEQUW,
30431 IX86_BUILTIN_VPCOMNEUW,
30432 IX86_BUILTIN_VPCOMLTUW,
30433 IX86_BUILTIN_VPCOMLEUW,
30434 IX86_BUILTIN_VPCOMGTUW,
30435 IX86_BUILTIN_VPCOMGEUW,
30436 IX86_BUILTIN_VPCOMFALSEUW,
30437 IX86_BUILTIN_VPCOMTRUEUW,
30439 IX86_BUILTIN_VPCOMEQUD,
30440 IX86_BUILTIN_VPCOMNEUD,
30441 IX86_BUILTIN_VPCOMLTUD,
30442 IX86_BUILTIN_VPCOMLEUD,
30443 IX86_BUILTIN_VPCOMGTUD,
30444 IX86_BUILTIN_VPCOMGEUD,
30445 IX86_BUILTIN_VPCOMFALSEUD,
30446 IX86_BUILTIN_VPCOMTRUEUD,
30448 IX86_BUILTIN_VPCOMEQUQ,
30449 IX86_BUILTIN_VPCOMNEUQ,
30450 IX86_BUILTIN_VPCOMLTUQ,
30451 IX86_BUILTIN_VPCOMLEUQ,
30452 IX86_BUILTIN_VPCOMGTUQ,
30453 IX86_BUILTIN_VPCOMGEUQ,
30454 IX86_BUILTIN_VPCOMFALSEUQ,
30455 IX86_BUILTIN_VPCOMTRUEUQ,
30457 IX86_BUILTIN_VPCOMEQB,
30458 IX86_BUILTIN_VPCOMNEB,
30459 IX86_BUILTIN_VPCOMLTB,
30460 IX86_BUILTIN_VPCOMLEB,
30461 IX86_BUILTIN_VPCOMGTB,
30462 IX86_BUILTIN_VPCOMGEB,
30463 IX86_BUILTIN_VPCOMFALSEB,
30464 IX86_BUILTIN_VPCOMTRUEB,
30466 IX86_BUILTIN_VPCOMEQW,
30467 IX86_BUILTIN_VPCOMNEW,
30468 IX86_BUILTIN_VPCOMLTW,
30469 IX86_BUILTIN_VPCOMLEW,
30470 IX86_BUILTIN_VPCOMGTW,
30471 IX86_BUILTIN_VPCOMGEW,
30472 IX86_BUILTIN_VPCOMFALSEW,
30473 IX86_BUILTIN_VPCOMTRUEW,
30475 IX86_BUILTIN_VPCOMEQD,
30476 IX86_BUILTIN_VPCOMNED,
30477 IX86_BUILTIN_VPCOMLTD,
30478 IX86_BUILTIN_VPCOMLED,
30479 IX86_BUILTIN_VPCOMGTD,
30480 IX86_BUILTIN_VPCOMGED,
30481 IX86_BUILTIN_VPCOMFALSED,
30482 IX86_BUILTIN_VPCOMTRUED,
30484 IX86_BUILTIN_VPCOMEQQ,
30485 IX86_BUILTIN_VPCOMNEQ,
30486 IX86_BUILTIN_VPCOMLTQ,
30487 IX86_BUILTIN_VPCOMLEQ,
30488 IX86_BUILTIN_VPCOMGTQ,
30489 IX86_BUILTIN_VPCOMGEQ,
30490 IX86_BUILTIN_VPCOMFALSEQ,
30491 IX86_BUILTIN_VPCOMTRUEQ,
30493 /* LWP instructions. */
30494 IX86_BUILTIN_LLWPCB,
30495 IX86_BUILTIN_SLWPCB,
30496 IX86_BUILTIN_LWPVAL32,
30497 IX86_BUILTIN_LWPVAL64,
30498 IX86_BUILTIN_LWPINS32,
30499 IX86_BUILTIN_LWPINS64,
30501 IX86_BUILTIN_CLZS,
30503 /* RTM */
30504 IX86_BUILTIN_XBEGIN,
30505 IX86_BUILTIN_XEND,
30506 IX86_BUILTIN_XABORT,
30507 IX86_BUILTIN_XTEST,
30509 /* MPX */
30510 IX86_BUILTIN_BNDMK,
30511 IX86_BUILTIN_BNDSTX,
30512 IX86_BUILTIN_BNDLDX,
30513 IX86_BUILTIN_BNDCL,
30514 IX86_BUILTIN_BNDCU,
30515 IX86_BUILTIN_BNDRET,
30516 IX86_BUILTIN_BNDNARROW,
30517 IX86_BUILTIN_BNDINT,
30518 IX86_BUILTIN_SIZEOF,
30519 IX86_BUILTIN_BNDLOWER,
30520 IX86_BUILTIN_BNDUPPER,
30522 /* BMI instructions. */
30523 IX86_BUILTIN_BEXTR32,
30524 IX86_BUILTIN_BEXTR64,
30525 IX86_BUILTIN_CTZS,
30527 /* TBM instructions. */
30528 IX86_BUILTIN_BEXTRI32,
30529 IX86_BUILTIN_BEXTRI64,
30531 /* BMI2 instructions. */
30532 IX86_BUILTIN_BZHI32,
30533 IX86_BUILTIN_BZHI64,
30534 IX86_BUILTIN_PDEP32,
30535 IX86_BUILTIN_PDEP64,
30536 IX86_BUILTIN_PEXT32,
30537 IX86_BUILTIN_PEXT64,
30539 /* ADX instructions. */
30540 IX86_BUILTIN_ADDCARRYX32,
30541 IX86_BUILTIN_ADDCARRYX64,
30543 /* SBB instructions. */
30544 IX86_BUILTIN_SBB32,
30545 IX86_BUILTIN_SBB64,
30547 /* FSGSBASE instructions. */
30548 IX86_BUILTIN_RDFSBASE32,
30549 IX86_BUILTIN_RDFSBASE64,
30550 IX86_BUILTIN_RDGSBASE32,
30551 IX86_BUILTIN_RDGSBASE64,
30552 IX86_BUILTIN_WRFSBASE32,
30553 IX86_BUILTIN_WRFSBASE64,
30554 IX86_BUILTIN_WRGSBASE32,
30555 IX86_BUILTIN_WRGSBASE64,
30557 /* RDRND instructions. */
30558 IX86_BUILTIN_RDRAND16_STEP,
30559 IX86_BUILTIN_RDRAND32_STEP,
30560 IX86_BUILTIN_RDRAND64_STEP,
30562 /* RDSEED instructions. */
30563 IX86_BUILTIN_RDSEED16_STEP,
30564 IX86_BUILTIN_RDSEED32_STEP,
30565 IX86_BUILTIN_RDSEED64_STEP,
30567 /* F16C instructions. */
30568 IX86_BUILTIN_CVTPH2PS,
30569 IX86_BUILTIN_CVTPH2PS256,
30570 IX86_BUILTIN_CVTPS2PH,
30571 IX86_BUILTIN_CVTPS2PH256,
30573 /* CFString built-in for darwin */
30574 IX86_BUILTIN_CFSTRING,
30576 /* Builtins to get CPU type and supported features. */
30577 IX86_BUILTIN_CPU_INIT,
30578 IX86_BUILTIN_CPU_IS,
30579 IX86_BUILTIN_CPU_SUPPORTS,
30581 /* Read/write FLAGS register built-ins. */
30582 IX86_BUILTIN_READ_FLAGS,
30583 IX86_BUILTIN_WRITE_FLAGS,
30585 IX86_BUILTIN_MAX
30588 /* Table for the ix86 builtin decls. */
30589 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30591 /* Table of all of the builtin functions that are possible with different ISA's
30592 but are waiting to be built until a function is declared to use that
30593 ISA. */
30594 struct builtin_isa {
30595 const char *name; /* function name */
30596 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30597 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30598 bool const_p; /* true if the declaration is constant */
30599 bool leaf_p; /* true if the declaration has leaf attribute */
30600 bool nothrow_p; /* true if the declaration has nothrow attribute */
30601 bool set_and_not_built_p;
30604 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30606 /* Bits that can still enable any inclusion of a builtin. */
30607 static HOST_WIDE_INT deferred_isa_values = 0;
30609 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30610 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30611 function decl in the ix86_builtins array. Returns the function decl or
30612 NULL_TREE, if the builtin was not added.
30614 If the front end has a special hook for builtin functions, delay adding
30615 builtin functions that aren't in the current ISA until the ISA is changed
30616 with function specific optimization. Doing so, can save about 300K for the
30617 default compiler. When the builtin is expanded, check at that time whether
30618 it is valid.
30620 If the front end doesn't have a special hook, record all builtins, even if
30621 it isn't an instruction set in the current ISA in case the user uses
30622 function specific options for a different ISA, so that we don't get scope
30623 errors if a builtin is added in the middle of a function scope. */
30625 static inline tree
30626 def_builtin (HOST_WIDE_INT mask, const char *name,
30627 enum ix86_builtin_func_type tcode,
30628 enum ix86_builtins code)
30630 tree decl = NULL_TREE;
30632 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30634 ix86_builtins_isa[(int) code].isa = mask;
30636 mask &= ~OPTION_MASK_ISA_64BIT;
30637 if (mask == 0
30638 || (mask & ix86_isa_flags) != 0
30639 || (lang_hooks.builtin_function
30640 == lang_hooks.builtin_function_ext_scope))
30643 tree type = ix86_get_builtin_func_type (tcode);
30644 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30645 NULL, NULL_TREE);
30646 ix86_builtins[(int) code] = decl;
30647 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30649 else
30651 /* Just a MASK where set_and_not_built_p == true can potentially
30652 include a builtin. */
30653 deferred_isa_values |= mask;
30654 ix86_builtins[(int) code] = NULL_TREE;
30655 ix86_builtins_isa[(int) code].tcode = tcode;
30656 ix86_builtins_isa[(int) code].name = name;
30657 ix86_builtins_isa[(int) code].leaf_p = false;
30658 ix86_builtins_isa[(int) code].nothrow_p = false;
30659 ix86_builtins_isa[(int) code].const_p = false;
30660 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30664 return decl;
30667 /* Like def_builtin, but also marks the function decl "const". */
30669 static inline tree
30670 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30671 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30673 tree decl = def_builtin (mask, name, tcode, code);
30674 if (decl)
30675 TREE_READONLY (decl) = 1;
30676 else
30677 ix86_builtins_isa[(int) code].const_p = true;
30679 return decl;
30682 /* Add any new builtin functions for a given ISA that may not have been
30683 declared. This saves a bit of space compared to adding all of the
30684 declarations to the tree, even if we didn't use them. */
30686 static void
30687 ix86_add_new_builtins (HOST_WIDE_INT isa)
30689 if ((isa & deferred_isa_values) == 0)
30690 return;
30692 /* Bits in ISA value can be removed from potential isa values. */
30693 deferred_isa_values &= ~isa;
30695 int i;
30696 tree saved_current_target_pragma = current_target_pragma;
30697 current_target_pragma = NULL_TREE;
30699 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30701 if ((ix86_builtins_isa[i].isa & isa) != 0
30702 && ix86_builtins_isa[i].set_and_not_built_p)
30704 tree decl, type;
30706 /* Don't define the builtin again. */
30707 ix86_builtins_isa[i].set_and_not_built_p = false;
30709 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30710 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30711 type, i, BUILT_IN_MD, NULL,
30712 NULL_TREE);
30714 ix86_builtins[i] = decl;
30715 if (ix86_builtins_isa[i].const_p)
30716 TREE_READONLY (decl) = 1;
30717 if (ix86_builtins_isa[i].leaf_p)
30718 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30719 NULL_TREE);
30720 if (ix86_builtins_isa[i].nothrow_p)
30721 TREE_NOTHROW (decl) = 1;
30725 current_target_pragma = saved_current_target_pragma;
30728 /* Bits for builtin_description.flag. */
30730 /* Set when we don't support the comparison natively, and should
30731 swap_comparison in order to support it. */
30732 #define BUILTIN_DESC_SWAP_OPERANDS 1
30734 struct builtin_description
30736 const HOST_WIDE_INT mask;
30737 const enum insn_code icode;
30738 const char *const name;
30739 const enum ix86_builtins code;
30740 const enum rtx_code comparison;
30741 const int flag;
30744 static const struct builtin_description bdesc_comi[] =
30746 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30747 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30748 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30749 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30750 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30751 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30752 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30753 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30754 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30755 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30756 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30757 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30758 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30759 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30760 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30761 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30762 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30763 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30764 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30765 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30766 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30767 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30768 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30769 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30772 static const struct builtin_description bdesc_pcmpestr[] =
30774 /* SSE4.2 */
30775 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30776 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30777 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30778 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30779 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30780 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30781 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30784 static const struct builtin_description bdesc_pcmpistr[] =
30786 /* SSE4.2 */
30787 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30788 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30789 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30790 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30791 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30792 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30793 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30796 /* Special builtins with variable number of arguments. */
30797 static const struct builtin_description bdesc_special_args[] =
30799 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30800 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30801 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
30803 /* 80387 (for use internally for atomic compound assignment). */
30804 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
30805 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
30806 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
30807 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
30809 /* MMX */
30810 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30812 /* 3DNow! */
30813 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30815 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
30816 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
30817 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
30818 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30819 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30820 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30821 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30822 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30823 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30825 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30826 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30827 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30828 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30829 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30830 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30831 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30832 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30834 /* SSE */
30835 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30836 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30837 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30839 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30840 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30841 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30842 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30844 /* SSE or 3DNow!A */
30845 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30846 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
30848 /* SSE2 */
30849 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30850 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30851 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30852 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
30853 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30854 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
30855 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
30856 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
30857 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
30858 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30860 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30861 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30863 /* SSE3 */
30864 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30866 /* SSE4.1 */
30867 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
30869 /* SSE4A */
30870 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30871 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30873 /* AVX */
30874 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
30875 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
30877 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30878 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30879 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
30881 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
30883 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30884 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30885 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30886 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30887 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30888 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
30889 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30891 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
30892 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30893 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30895 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
30896 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
30897 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
30898 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
30899 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
30900 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
30901 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
30902 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
30904 /* AVX2 */
30905 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
30906 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
30907 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
30908 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
30909 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
30910 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
30911 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
30912 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
30913 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
30915 /* AVX512F */
30916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
30937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
30938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
30939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
30940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30964 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
30965 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
30966 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
30967 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
30968 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
30969 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
30971 /* FSGSBASE */
30972 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30973 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30974 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30975 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30976 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30977 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30978 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30979 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30981 /* RTM */
30982 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30983 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
30984 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
30986 /* AVX512BW */
30987 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
30988 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
30989 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
30990 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
30992 /* AVX512VL */
30993 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
30994 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
30995 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
30996 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
30997 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30998 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30999 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31000 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31001 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31002 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31003 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31004 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31005 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31006 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31007 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31008 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31009 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31010 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31011 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31012 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31013 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31014 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31015 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31016 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31017 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31018 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31019 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31020 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31021 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31022 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31023 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31024 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31025 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31026 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31027 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31028 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31029 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
31030 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
31031 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
31032 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
31033 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31034 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31035 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31036 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31037 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31038 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31039 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31040 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31041 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31042 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31043 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31044 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31045 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31046 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31047 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31048 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31049 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31050 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31051 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31052 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31053 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31054 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31055 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31056 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31057 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31058 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31059 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31060 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31061 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31062 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31063 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31064 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31065 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31066 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31067 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31068 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31069 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31070 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31071 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31072 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31073 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31074 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31075 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31076 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31077 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31078 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31079 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31080 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31081 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31082 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31083 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31084 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31085 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31086 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31088 /* PCOMMIT. */
31089 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
31092 /* Builtins with variable number of arguments. */
31093 static const struct builtin_description bdesc_args[] =
31095 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
31096 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
31097 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
31098 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31099 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31100 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31101 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31103 /* MMX */
31104 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31105 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31106 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31107 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31108 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31109 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31111 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31112 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31113 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31114 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31115 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31116 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31117 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31118 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31120 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31121 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31123 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31124 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31125 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31126 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31128 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31129 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31130 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31131 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31132 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31133 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31135 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31136 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31137 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31138 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31139 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
31140 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
31142 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31143 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
31144 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31146 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
31148 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31149 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31150 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31151 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31152 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31153 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31155 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31156 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31157 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31158 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31159 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31160 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31162 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31163 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31164 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31165 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31167 /* 3DNow! */
31168 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31169 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31170 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31171 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31173 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31174 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31175 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31176 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31177 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31178 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31179 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31180 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31181 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31182 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31183 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31184 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31185 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31186 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31187 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31189 /* 3DNow!A */
31190 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31191 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31192 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31193 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31194 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31195 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31197 /* SSE */
31198 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
31199 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31200 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31201 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31202 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31203 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31204 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31205 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31206 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31207 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31208 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31209 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31211 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31213 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31214 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31215 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31216 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31217 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31218 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31219 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31220 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31222 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31223 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31224 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31225 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31226 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31227 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31228 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31229 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31230 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31231 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31232 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
31233 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31234 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31235 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31236 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31237 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31238 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31239 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31240 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31241 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31243 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31244 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31245 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31246 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31248 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31249 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31250 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31251 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31253 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31255 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31256 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31257 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31258 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31259 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31261 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
31262 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
31263 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
31265 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
31267 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31268 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31269 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31271 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
31272 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
31274 /* SSE MMX or 3Dnow!A */
31275 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31276 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31277 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31279 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31280 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31281 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31282 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31284 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
31285 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
31287 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
31289 /* SSE2 */
31290 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31292 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
31293 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
31294 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31295 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
31296 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
31298 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31299 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31300 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
31301 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31302 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31304 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
31306 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31307 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31308 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31309 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31311 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31312 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31313 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31315 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31316 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31317 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31318 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31325 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31327 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31328 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31329 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31330 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31331 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31332 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31333 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31334 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31335 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31336 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31337 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31338 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31339 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31340 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31341 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31342 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31343 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31345 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31346 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31347 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31348 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31350 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31351 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31352 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31353 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31355 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31357 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31358 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31359 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31361 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31363 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31364 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31365 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31366 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31367 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31368 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31369 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31370 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31372 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31373 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31374 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31375 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31376 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31377 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31378 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31379 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31381 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31382 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31384 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31386 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31387 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31389 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31390 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31393 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31394 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31395 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31396 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31397 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31399 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31400 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31401 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31404 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31405 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31406 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31407 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31408 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31409 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31410 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31411 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31417 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31421 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31426 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31431 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31432 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31433 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31434 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31435 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31436 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31438 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31439 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31440 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31441 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31442 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31443 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31444 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31446 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31447 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31448 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31449 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31451 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31452 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31453 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31455 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31457 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31459 /* SSE2 MMX */
31460 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31461 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31463 /* SSE3 */
31464 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31465 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31467 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31468 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31469 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31470 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31471 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31472 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31474 /* SSSE3 */
31475 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31476 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31477 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31478 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31479 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31480 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31482 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31483 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31484 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31485 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31486 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31487 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31488 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31489 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31490 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31491 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31492 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31493 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31494 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31495 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31496 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31497 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31498 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31499 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31500 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31501 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31502 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31503 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31504 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31505 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31507 /* SSSE3. */
31508 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31509 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31511 /* SSE4.1 */
31512 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31513 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31514 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31515 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31516 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31517 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31518 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31519 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31520 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31521 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31523 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31524 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31525 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31526 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31527 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31528 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31529 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31530 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31531 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31532 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31533 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31534 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31535 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31537 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31538 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31539 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31540 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31541 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31542 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31543 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31544 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31545 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31546 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31547 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31548 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31550 /* SSE4.1 */
31551 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31552 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31553 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31554 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31556 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31557 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31558 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31559 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31561 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31562 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31564 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31565 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31567 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31568 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31569 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31570 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31572 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31573 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31575 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31576 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31578 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31579 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31580 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31582 /* SSE4.2 */
31583 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31584 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31585 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31586 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31587 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31589 /* SSE4A */
31590 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31591 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31592 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31593 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31595 /* AES */
31596 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31597 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31599 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31600 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31601 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31602 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31604 /* PCLMUL */
31605 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31607 /* AVX */
31608 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31609 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31610 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31611 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31612 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31613 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31614 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31615 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31616 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31617 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31618 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31619 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31620 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31621 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31622 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31623 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31624 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31625 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31626 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31627 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31628 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31629 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31630 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31631 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31632 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31633 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31635 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31636 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31637 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31638 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31640 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31641 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31642 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31643 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31644 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31645 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31646 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31647 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31648 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31649 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31650 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31651 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31652 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31653 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31654 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31655 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31656 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31657 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31658 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31659 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31660 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31661 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31662 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31663 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31664 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31665 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31666 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31667 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31668 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31669 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31670 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31671 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31672 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31673 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31675 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31676 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31677 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31679 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31680 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31681 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31682 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31683 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31685 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31687 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31688 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31690 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31691 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31692 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31693 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31695 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31696 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31699 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31701 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31706 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31707 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31709 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31710 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31720 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31721 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31722 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31743 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31744 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31746 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31748 /* AVX2 */
31749 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31750 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31751 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31752 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31753 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31754 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31755 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31756 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31757 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31758 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31759 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31760 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31761 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31762 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31763 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31764 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31765 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31766 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31767 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31768 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31769 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31770 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31771 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31772 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31773 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31774 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31775 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31776 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31777 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31778 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31779 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31780 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31781 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31782 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31783 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31784 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31785 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31786 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31787 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31788 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31789 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31790 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31791 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31792 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31793 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31794 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31795 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31796 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31797 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31798 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31799 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31800 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31801 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31802 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31803 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31804 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31805 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31806 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31807 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31808 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31809 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31810 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31811 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31812 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31813 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31814 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31815 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31816 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31817 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31818 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31819 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31820 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31821 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31822 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31823 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
31824 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31825 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31826 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31827 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31828 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31829 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31830 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31831 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31832 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31833 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31834 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31835 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31836 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31837 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31838 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31839 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31840 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31841 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31842 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31843 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31844 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31845 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31846 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31847 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31848 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31849 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31850 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31851 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31852 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31853 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31854 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31855 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31856 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31857 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31858 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31859 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31860 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31861 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31862 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31863 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31864 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31866 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31867 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31868 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31869 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31870 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
31871 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
31872 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31873 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31874 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31875 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31876 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31877 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31878 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31879 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31880 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31881 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
31882 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
31883 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
31884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
31885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31890 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31892 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31893 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31894 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31896 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31898 /* BMI */
31899 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31900 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31901 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31903 /* TBM */
31904 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31905 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31907 /* F16C */
31908 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
31909 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
31910 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
31911 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
31913 /* BMI2 */
31914 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31915 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31916 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31917 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31918 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31919 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31921 /* AVX512F */
31922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
31923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
31924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
31925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
31926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
31927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
31928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
31936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
31938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
31939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
31946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
31948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
31953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
31954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
31955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
31956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
31957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
31958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
31959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
31960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
31977 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
31978 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
31979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
31980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31982 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31984 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31986 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31988 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31989 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31992 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31994 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
32010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
32011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
32012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
32013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
32014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
32020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
32021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
32022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
32023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
32024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
32025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
32027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
32077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
32078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
32079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
32080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
32081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
32082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
32083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
32084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32088 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32089 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32090 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32091 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
32120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
32121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
32122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32123 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
32125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32128 /* Mask arithmetic operations */
32129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
32132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
32136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
32140 /* SHA */
32141 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32142 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32143 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32144 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32145 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32146 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32147 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
32149 /* AVX512VL. */
32150 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
32151 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
32152 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32153 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32154 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32155 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32156 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32157 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32158 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32159 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32160 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32161 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32162 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32163 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32164 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32165 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32166 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32167 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32168 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32169 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32170 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32171 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32172 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32173 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32174 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32175 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32176 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32177 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32178 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32179 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32180 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32181 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32182 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32183 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32184 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32186 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32187 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32188 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32189 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32190 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32191 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32192 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32193 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32194 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32195 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32196 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32197 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32198 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32199 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32200 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32201 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32202 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32203 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
32204 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
32205 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
32206 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
32207 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32208 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32209 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32210 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32211 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32212 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32213 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32214 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32216 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32217 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32218 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32219 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32220 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32221 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32222 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32223 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32224 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32225 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32226 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32227 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32228 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32229 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32230 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32231 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32232 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32233 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32234 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32235 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32236 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32237 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32238 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32239 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
32240 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
32241 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
32242 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
32243 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32244 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
32245 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
32246 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
32247 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32248 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
32249 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32250 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
32251 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32252 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
32253 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32254 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32255 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32256 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32257 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32258 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32259 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32260 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
32261 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
32262 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
32263 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
32264 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32265 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32266 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32267 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32268 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32269 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32270 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32271 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32272 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32273 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32274 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32275 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32276 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32277 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32278 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32279 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32280 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32281 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32282 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32283 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32284 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32285 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32286 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32287 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32288 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32289 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32290 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32291 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32292 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32293 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32294 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32295 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32296 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32297 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32298 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32299 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32300 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32301 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32302 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32303 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32304 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32305 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32306 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32307 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32308 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32309 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32310 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32311 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32312 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32313 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32314 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32315 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32316 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32317 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32318 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32319 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32320 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32321 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32322 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32323 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32324 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32325 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32326 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32327 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32328 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32329 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32330 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32331 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32332 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32333 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32334 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32335 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32336 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32337 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32338 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32339 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32340 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32341 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32342 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32343 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32344 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32345 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32346 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32347 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32348 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32349 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32350 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32351 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32352 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32353 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32354 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32355 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32356 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32357 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32358 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32359 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32360 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32361 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32362 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32363 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32364 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32365 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32366 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32367 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32368 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32369 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32370 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32371 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32372 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32373 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32374 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32375 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32376 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32377 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32378 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32379 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32380 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32381 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32382 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32383 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32384 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32385 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32386 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32387 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32388 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32389 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32390 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32391 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32392 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32393 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32394 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32395 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32396 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32397 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32398 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32399 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32400 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32401 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32402 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32403 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32404 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32405 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32406 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32407 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32408 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32409 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32410 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32411 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32412 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32413 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32414 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32415 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32416 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32417 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32418 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32419 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32420 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32421 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32422 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32423 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32424 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32425 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32426 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32427 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32428 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32429 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32430 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32431 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32432 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32433 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32434 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32435 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32436 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32437 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32438 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32439 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32440 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32441 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32442 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32443 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32444 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32447 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32448 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32449 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32450 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32451 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32452 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32453 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32454 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32455 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32456 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32457 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32458 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32459 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32460 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32461 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32462 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32463 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32464 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32465 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32466 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32467 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32468 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32469 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32470 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32471 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32472 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32473 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32474 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32475 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32476 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32477 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32478 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32479 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32480 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32481 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32482 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32483 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32484 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32485 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32486 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32487 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32488 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32489 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32490 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32491 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32492 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32493 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32494 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32495 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32496 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32497 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32498 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32499 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32500 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32501 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32502 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32503 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32504 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32505 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32506 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32507 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32508 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32509 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32510 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32511 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32512 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32513 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32514 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32515 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32516 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32517 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32518 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32519 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32520 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32521 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32522 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32523 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32524 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32525 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32526 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32527 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32528 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32529 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32530 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32531 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32532 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
32533 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
32534 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32535 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32536 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
32537 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
32538 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
32539 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
32540 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32541 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32542 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32543 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32544 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32545 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32546 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32547 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32548 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32549 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32550 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32551 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32552 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32553 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32554 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32555 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32556 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32557 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32558 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32559 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32560 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32561 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32562 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32563 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32564 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32565 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32566 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32567 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32568 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32569 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32570 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32571 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32572 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32573 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32574 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32575 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32576 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32577 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32578 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32579 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32580 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32581 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32582 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32583 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32584 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32585 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32586 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32587 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32588 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32589 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32590 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32591 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32592 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32593 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32594 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32595 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32596 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32597 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32598 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32599 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32600 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32601 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32602 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32603 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32604 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32605 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32606 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32607 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32608 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32609 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32610 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32611 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32612 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32613 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32614 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32615 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32616 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32617 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32618 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32619 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32620 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32621 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32622 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32623 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32624 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32625 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32626 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32627 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32628 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32629 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32630 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32631 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32632 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32633 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32634 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32635 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32636 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32637 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32638 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32639 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32640 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
32641 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
32642 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32643 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
32644 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
32645 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32646 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
32647 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
32648 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
32649 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
32650 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
32651 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
32652 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
32653 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
32654 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
32655 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
32656 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
32657 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
32658 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
32659 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
32660 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
32661 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
32662 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32663 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32664 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32665 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32666 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32667 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32668 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32669 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32670 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32671 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32672 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32673 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32674 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32675 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32676 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32677 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32678 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32679 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32680 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32681 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32682 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32683 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32684 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32685 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32686 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32687 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32688 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32689 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32690 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32691 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32692 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32693 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32694 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32695 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32696 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32697 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32698 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32699 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32700 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32701 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32702 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32703 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32704 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32705 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32706 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32707 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32708 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32709 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32710 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32711 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32712 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32713 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32714 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32715 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32716 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32717 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32718 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32719 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32720 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32721 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32722 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32723 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32724 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32725 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32726 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32727 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32728 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32729 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32730 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32731 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32732 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32733 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32734 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32735 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32736 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32737 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32738 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32739 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32740 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32741 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32742 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32743 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32744 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32745 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32746 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32747 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32748 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32749 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32750 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32751 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32752 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32753 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32754 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32755 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32756 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32757 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32758 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32759 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32760 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32761 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32762 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32763 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32764 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32765 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32766 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32767 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32768 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32769 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32770 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32771 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32772 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32773 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32774 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32775 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32776 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32777 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32778 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32779 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32780 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32781 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32782 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32783 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32784 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32785 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32786 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32787 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32788 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32789 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32790 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32791 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32792 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32793 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32794 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32795 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32796 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32797 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32798 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32799 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32800 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32801 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32802 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32803 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32804 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32805 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32806 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32807 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32808 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32809 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32810 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32811 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32812 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32813 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32814 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32815 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32816 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32817 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32818 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32819 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32820 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32821 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32822 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32823 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32824 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32825 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32826 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32827 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32828 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32829 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32830 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32831 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32832 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32833 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32834 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32835 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32836 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32837 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
32838 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
32839 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32840 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32841 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32842 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32843 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32844 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32845 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32846 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32847 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32848 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32849 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32850 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
32851 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
32852 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32853 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32854 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32855 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32856 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32857 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32858 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32859 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32860 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
32861 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
32863 /* AVX512DQ. */
32864 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32865 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32866 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32867 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32868 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
32869 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
32870 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
32871 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
32872 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
32873 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
32874 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32875 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32876 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32877 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32878 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32879 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32880 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32881 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32882 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32883 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
32884 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32885 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
32886 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
32887 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
32888 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
32889 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
32890 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
32891 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
32892 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
32893 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
32894 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
32896 /* AVX512BW. */
32897 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
32898 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
32899 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32900 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32901 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32902 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32903 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
32904 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
32905 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32906 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32907 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
32908 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
32909 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
32910 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
32911 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
32912 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
32913 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32914 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32915 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32916 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32917 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32918 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32919 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32920 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32921 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32922 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32923 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32924 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32925 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32926 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32927 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32928 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32929 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32930 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32931 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32932 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32933 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32934 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32935 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32936 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32937 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32938 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32939 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32940 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32941 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32942 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32943 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32944 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32945 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32946 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32947 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32948 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32949 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32950 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32951 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32952 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32953 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
32954 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
32955 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32956 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32957 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32958 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32959 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32960 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32961 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32962 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32963 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32964 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32965 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32966 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32967 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
32968 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
32969 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
32970 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
32971 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32972 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32973 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32974 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32975 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32976 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32977 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32978 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32979 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32980 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32981 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32982 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32983 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32984 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32985 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32986 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32987 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32989 /* AVX512IFMA */
32990 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32991 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32992 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32993 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32994 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32995 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32996 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32997 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32998 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32999 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33000 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33001 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33003 /* AVX512VBMI */
33004 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33005 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33006 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33007 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33008 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33009 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33010 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33011 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33012 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33013 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33014 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33015 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33016 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33017 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33018 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33021 /* Builtins with rounding support. */
33022 static const struct builtin_description bdesc_round_args[] =
33024 /* AVX512F */
33025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
33030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
33031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
33032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
33033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
33034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
33035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
33038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
33040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
33042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
33044 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
33045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
33046 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
33047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
33048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33053 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
33054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
33055 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
33056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33105 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33107 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33109 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33111 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33113 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33115 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33117 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33119 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
33127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
33128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33145 /* AVX512ER */
33146 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33147 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33148 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33149 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33150 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33151 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33152 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33153 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33154 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33155 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33157 /* AVX512DQ. */
33158 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33159 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33160 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33161 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33162 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33163 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33164 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33165 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33166 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33167 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33168 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33169 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33170 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33171 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33172 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
33173 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
33176 /* Bultins for MPX. */
33177 static const struct builtin_description bdesc_mpx[] =
33179 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
33180 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33181 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33184 /* Const builtins for MPX. */
33185 static const struct builtin_description bdesc_mpx_const[] =
33187 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
33188 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
33189 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
33190 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
33191 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
33192 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
33193 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
33194 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
33197 /* FMA4 and XOP. */
33198 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
33199 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
33200 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
33201 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
33202 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
33203 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
33204 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
33205 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
33206 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
33207 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
33208 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
33209 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
33210 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
33211 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
33212 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
33213 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
33214 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
33215 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
33216 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
33217 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
33218 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
33219 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
33220 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
33221 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
33222 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
33223 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
33224 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
33225 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
33226 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
33227 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
33228 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
33229 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
33230 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
33231 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
33232 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
33233 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
33234 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
33235 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
33236 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
33237 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
33238 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
33239 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
33240 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
33241 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
33242 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
33243 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
33244 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
33245 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
33246 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
33247 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
33248 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
33249 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
33251 static const struct builtin_description bdesc_multi_arg[] =
33253 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
33254 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
33255 UNKNOWN, (int)MULTI_ARG_3_SF },
33256 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
33257 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
33258 UNKNOWN, (int)MULTI_ARG_3_DF },
33260 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
33261 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
33262 UNKNOWN, (int)MULTI_ARG_3_SF },
33263 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
33264 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
33265 UNKNOWN, (int)MULTI_ARG_3_DF },
33267 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
33268 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
33269 UNKNOWN, (int)MULTI_ARG_3_SF },
33270 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
33271 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
33272 UNKNOWN, (int)MULTI_ARG_3_DF },
33273 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
33274 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
33275 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33276 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
33277 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
33278 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33280 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
33281 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
33282 UNKNOWN, (int)MULTI_ARG_3_SF },
33283 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
33284 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
33285 UNKNOWN, (int)MULTI_ARG_3_DF },
33286 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
33287 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
33288 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33289 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
33290 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
33291 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33293 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
33294 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
33295 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
33296 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
33297 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
33298 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
33299 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
33301 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33302 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33303 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
33304 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
33305 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
33306 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
33307 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
33309 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
33311 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33312 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33313 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33314 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33315 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33316 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33317 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33318 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33319 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33320 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33321 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33322 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33324 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33325 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
33326 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
33327 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
33328 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
33329 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
33330 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
33331 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
33332 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33333 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
33334 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
33335 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
33336 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33337 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
33338 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
33339 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
33341 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
33342 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
33343 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
33344 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
33345 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
33346 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
33348 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33349 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33350 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33351 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33352 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33353 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33354 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33355 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33356 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33357 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33358 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33359 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33360 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33361 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33362 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33364 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
33365 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33366 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33367 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
33368 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
33369 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
33370 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
33372 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
33373 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33374 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33375 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
33376 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
33377 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
33378 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
33380 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
33381 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33382 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33383 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
33384 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
33385 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
33386 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
33388 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33389 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33390 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33391 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
33392 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
33393 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
33394 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
33396 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
33397 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33398 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33399 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
33400 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
33401 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
33402 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
33404 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
33405 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33406 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33407 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
33408 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
33409 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
33410 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
33412 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
33413 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33414 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33415 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
33416 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
33417 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
33418 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
33420 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33421 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33422 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33423 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
33424 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
33425 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
33426 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
33428 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33429 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33430 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33431 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33432 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33433 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33434 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33435 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33437 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33438 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33439 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33440 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33441 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33442 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33443 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33444 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33446 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33447 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33448 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33449 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33453 /* TM vector builtins. */
33455 /* Reuse the existing x86-specific `struct builtin_description' cause
33456 we're lazy. Add casts to make them fit. */
33457 static const struct builtin_description bdesc_tm[] =
33459 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33460 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33461 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33462 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33463 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33464 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33465 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33467 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33468 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33469 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33470 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33471 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33472 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33473 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33475 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33476 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33477 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33478 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33479 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33480 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33481 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33483 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33484 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33485 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33488 /* TM callbacks. */
33490 /* Return the builtin decl needed to load a vector of TYPE. */
33492 static tree
33493 ix86_builtin_tm_load (tree type)
33495 if (TREE_CODE (type) == VECTOR_TYPE)
33497 switch (tree_to_uhwi (TYPE_SIZE (type)))
33499 case 64:
33500 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33501 case 128:
33502 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33503 case 256:
33504 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33507 return NULL_TREE;
33510 /* Return the builtin decl needed to store a vector of TYPE. */
33512 static tree
33513 ix86_builtin_tm_store (tree type)
33515 if (TREE_CODE (type) == VECTOR_TYPE)
33517 switch (tree_to_uhwi (TYPE_SIZE (type)))
33519 case 64:
33520 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33521 case 128:
33522 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33523 case 256:
33524 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33527 return NULL_TREE;
33530 /* Initialize the transactional memory vector load/store builtins. */
33532 static void
33533 ix86_init_tm_builtins (void)
33535 enum ix86_builtin_func_type ftype;
33536 const struct builtin_description *d;
33537 size_t i;
33538 tree decl;
33539 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33540 tree attrs_log, attrs_type_log;
33542 if (!flag_tm)
33543 return;
33545 /* If there are no builtins defined, we must be compiling in a
33546 language without trans-mem support. */
33547 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33548 return;
33550 /* Use whatever attributes a normal TM load has. */
33551 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33552 attrs_load = DECL_ATTRIBUTES (decl);
33553 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33554 /* Use whatever attributes a normal TM store has. */
33555 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33556 attrs_store = DECL_ATTRIBUTES (decl);
33557 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33558 /* Use whatever attributes a normal TM log has. */
33559 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33560 attrs_log = DECL_ATTRIBUTES (decl);
33561 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33563 for (i = 0, d = bdesc_tm;
33564 i < ARRAY_SIZE (bdesc_tm);
33565 i++, d++)
33567 if ((d->mask & ix86_isa_flags) != 0
33568 || (lang_hooks.builtin_function
33569 == lang_hooks.builtin_function_ext_scope))
33571 tree type, attrs, attrs_type;
33572 enum built_in_function code = (enum built_in_function) d->code;
33574 ftype = (enum ix86_builtin_func_type) d->flag;
33575 type = ix86_get_builtin_func_type (ftype);
33577 if (BUILTIN_TM_LOAD_P (code))
33579 attrs = attrs_load;
33580 attrs_type = attrs_type_load;
33582 else if (BUILTIN_TM_STORE_P (code))
33584 attrs = attrs_store;
33585 attrs_type = attrs_type_store;
33587 else
33589 attrs = attrs_log;
33590 attrs_type = attrs_type_log;
33592 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33593 /* The builtin without the prefix for
33594 calling it directly. */
33595 d->name + strlen ("__builtin_"),
33596 attrs);
33597 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33598 set the TYPE_ATTRIBUTES. */
33599 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33601 set_builtin_decl (code, decl, false);
33606 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33607 in the current target ISA to allow the user to compile particular modules
33608 with different target specific options that differ from the command line
33609 options. */
33610 static void
33611 ix86_init_mmx_sse_builtins (void)
33613 const struct builtin_description * d;
33614 enum ix86_builtin_func_type ftype;
33615 size_t i;
33617 /* Add all special builtins with variable number of operands. */
33618 for (i = 0, d = bdesc_special_args;
33619 i < ARRAY_SIZE (bdesc_special_args);
33620 i++, d++)
33622 if (d->name == 0)
33623 continue;
33625 ftype = (enum ix86_builtin_func_type) d->flag;
33626 def_builtin (d->mask, d->name, ftype, d->code);
33629 /* Add all builtins with variable number of operands. */
33630 for (i = 0, d = bdesc_args;
33631 i < ARRAY_SIZE (bdesc_args);
33632 i++, d++)
33634 if (d->name == 0)
33635 continue;
33637 ftype = (enum ix86_builtin_func_type) d->flag;
33638 def_builtin_const (d->mask, d->name, ftype, d->code);
33641 /* Add all builtins with rounding. */
33642 for (i = 0, d = bdesc_round_args;
33643 i < ARRAY_SIZE (bdesc_round_args);
33644 i++, d++)
33646 if (d->name == 0)
33647 continue;
33649 ftype = (enum ix86_builtin_func_type) d->flag;
33650 def_builtin_const (d->mask, d->name, ftype, d->code);
33653 /* pcmpestr[im] insns. */
33654 for (i = 0, d = bdesc_pcmpestr;
33655 i < ARRAY_SIZE (bdesc_pcmpestr);
33656 i++, d++)
33658 if (d->code == IX86_BUILTIN_PCMPESTRM128)
33659 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33660 else
33661 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33662 def_builtin_const (d->mask, d->name, ftype, d->code);
33665 /* pcmpistr[im] insns. */
33666 for (i = 0, d = bdesc_pcmpistr;
33667 i < ARRAY_SIZE (bdesc_pcmpistr);
33668 i++, d++)
33670 if (d->code == IX86_BUILTIN_PCMPISTRM128)
33671 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33672 else
33673 ftype = INT_FTYPE_V16QI_V16QI_INT;
33674 def_builtin_const (d->mask, d->name, ftype, d->code);
33677 /* comi/ucomi insns. */
33678 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33680 if (d->mask == OPTION_MASK_ISA_SSE2)
33681 ftype = INT_FTYPE_V2DF_V2DF;
33682 else
33683 ftype = INT_FTYPE_V4SF_V4SF;
33684 def_builtin_const (d->mask, d->name, ftype, d->code);
33687 /* SSE */
33688 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33689 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33690 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33691 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33693 /* SSE or 3DNow!A */
33694 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33695 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33696 IX86_BUILTIN_MASKMOVQ);
33698 /* SSE2 */
33699 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33700 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33702 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33703 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33704 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33705 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33707 /* SSE3. */
33708 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33709 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33710 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33711 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33713 /* AES */
33714 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33715 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33716 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33717 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33718 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33719 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33720 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33721 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33722 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33723 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33724 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33725 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33727 /* PCLMUL */
33728 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33729 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33731 /* RDRND */
33732 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33733 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33734 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33735 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33736 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33737 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33738 IX86_BUILTIN_RDRAND64_STEP);
33740 /* AVX2 */
33741 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33742 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33743 IX86_BUILTIN_GATHERSIV2DF);
33745 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33746 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33747 IX86_BUILTIN_GATHERSIV4DF);
33749 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33750 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33751 IX86_BUILTIN_GATHERDIV2DF);
33753 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33754 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33755 IX86_BUILTIN_GATHERDIV4DF);
33757 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33758 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33759 IX86_BUILTIN_GATHERSIV4SF);
33761 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33762 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33763 IX86_BUILTIN_GATHERSIV8SF);
33765 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33766 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33767 IX86_BUILTIN_GATHERDIV4SF);
33769 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33770 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33771 IX86_BUILTIN_GATHERDIV8SF);
33773 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33774 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33775 IX86_BUILTIN_GATHERSIV2DI);
33777 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33778 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33779 IX86_BUILTIN_GATHERSIV4DI);
33781 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33782 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33783 IX86_BUILTIN_GATHERDIV2DI);
33785 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33786 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33787 IX86_BUILTIN_GATHERDIV4DI);
33789 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33790 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33791 IX86_BUILTIN_GATHERSIV4SI);
33793 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33794 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33795 IX86_BUILTIN_GATHERSIV8SI);
33797 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33798 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33799 IX86_BUILTIN_GATHERDIV4SI);
33801 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
33802 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
33803 IX86_BUILTIN_GATHERDIV8SI);
33805 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
33806 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
33807 IX86_BUILTIN_GATHERALTSIV4DF);
33809 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
33810 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
33811 IX86_BUILTIN_GATHERALTDIV8SF);
33813 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
33814 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
33815 IX86_BUILTIN_GATHERALTSIV4DI);
33817 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
33818 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
33819 IX86_BUILTIN_GATHERALTDIV8SI);
33821 /* AVX512F */
33822 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
33823 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
33824 IX86_BUILTIN_GATHER3SIV16SF);
33826 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
33827 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
33828 IX86_BUILTIN_GATHER3SIV8DF);
33830 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
33831 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
33832 IX86_BUILTIN_GATHER3DIV16SF);
33834 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
33835 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
33836 IX86_BUILTIN_GATHER3DIV8DF);
33838 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
33839 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
33840 IX86_BUILTIN_GATHER3SIV16SI);
33842 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
33843 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
33844 IX86_BUILTIN_GATHER3SIV8DI);
33846 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
33847 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
33848 IX86_BUILTIN_GATHER3DIV16SI);
33850 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
33851 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
33852 IX86_BUILTIN_GATHER3DIV8DI);
33854 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
33855 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
33856 IX86_BUILTIN_GATHER3ALTSIV8DF);
33858 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
33859 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
33860 IX86_BUILTIN_GATHER3ALTDIV16SF);
33862 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
33863 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
33864 IX86_BUILTIN_GATHER3ALTSIV8DI);
33866 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
33867 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
33868 IX86_BUILTIN_GATHER3ALTDIV16SI);
33870 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
33871 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
33872 IX86_BUILTIN_SCATTERSIV16SF);
33874 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
33875 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
33876 IX86_BUILTIN_SCATTERSIV8DF);
33878 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
33879 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
33880 IX86_BUILTIN_SCATTERDIV16SF);
33882 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
33883 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
33884 IX86_BUILTIN_SCATTERDIV8DF);
33886 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
33887 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
33888 IX86_BUILTIN_SCATTERSIV16SI);
33890 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
33891 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
33892 IX86_BUILTIN_SCATTERSIV8DI);
33894 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
33895 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
33896 IX86_BUILTIN_SCATTERDIV16SI);
33898 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
33899 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
33900 IX86_BUILTIN_SCATTERDIV8DI);
33902 /* AVX512VL */
33903 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
33904 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
33905 IX86_BUILTIN_GATHER3SIV2DF);
33907 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
33908 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
33909 IX86_BUILTIN_GATHER3SIV4DF);
33911 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
33912 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
33913 IX86_BUILTIN_GATHER3DIV2DF);
33915 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
33916 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
33917 IX86_BUILTIN_GATHER3DIV4DF);
33919 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
33920 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
33921 IX86_BUILTIN_GATHER3SIV4SF);
33923 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
33924 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
33925 IX86_BUILTIN_GATHER3SIV8SF);
33927 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
33928 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
33929 IX86_BUILTIN_GATHER3DIV4SF);
33931 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
33932 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
33933 IX86_BUILTIN_GATHER3DIV8SF);
33935 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
33936 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
33937 IX86_BUILTIN_GATHER3SIV2DI);
33939 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
33940 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
33941 IX86_BUILTIN_GATHER3SIV4DI);
33943 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
33944 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
33945 IX86_BUILTIN_GATHER3DIV2DI);
33947 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
33948 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
33949 IX86_BUILTIN_GATHER3DIV4DI);
33951 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
33952 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
33953 IX86_BUILTIN_GATHER3SIV4SI);
33955 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
33956 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
33957 IX86_BUILTIN_GATHER3SIV8SI);
33959 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
33960 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
33961 IX86_BUILTIN_GATHER3DIV4SI);
33963 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
33964 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
33965 IX86_BUILTIN_GATHER3DIV8SI);
33967 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
33968 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
33969 IX86_BUILTIN_GATHER3ALTSIV4DF);
33971 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
33972 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
33973 IX86_BUILTIN_GATHER3ALTDIV8SF);
33975 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
33976 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
33977 IX86_BUILTIN_GATHER3ALTSIV4DI);
33979 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
33980 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
33981 IX86_BUILTIN_GATHER3ALTDIV8SI);
33983 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
33984 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
33985 IX86_BUILTIN_SCATTERSIV8SF);
33987 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
33988 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
33989 IX86_BUILTIN_SCATTERSIV4SF);
33991 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
33992 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
33993 IX86_BUILTIN_SCATTERSIV4DF);
33995 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
33996 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
33997 IX86_BUILTIN_SCATTERSIV2DF);
33999 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
34000 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
34001 IX86_BUILTIN_SCATTERDIV8SF);
34003 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
34004 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
34005 IX86_BUILTIN_SCATTERDIV4SF);
34007 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
34008 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
34009 IX86_BUILTIN_SCATTERDIV4DF);
34011 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
34012 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
34013 IX86_BUILTIN_SCATTERDIV2DF);
34015 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
34016 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
34017 IX86_BUILTIN_SCATTERSIV8SI);
34019 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
34020 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
34021 IX86_BUILTIN_SCATTERSIV4SI);
34023 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
34024 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
34025 IX86_BUILTIN_SCATTERSIV4DI);
34027 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
34028 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
34029 IX86_BUILTIN_SCATTERSIV2DI);
34031 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
34032 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
34033 IX86_BUILTIN_SCATTERDIV8SI);
34035 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
34036 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
34037 IX86_BUILTIN_SCATTERDIV4SI);
34039 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
34040 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
34041 IX86_BUILTIN_SCATTERDIV4DI);
34043 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
34044 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
34045 IX86_BUILTIN_SCATTERDIV2DI);
34047 /* AVX512PF */
34048 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
34049 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34050 IX86_BUILTIN_GATHERPFDPD);
34051 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
34052 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34053 IX86_BUILTIN_GATHERPFDPS);
34054 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
34055 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34056 IX86_BUILTIN_GATHERPFQPD);
34057 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
34058 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34059 IX86_BUILTIN_GATHERPFQPS);
34060 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
34061 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34062 IX86_BUILTIN_SCATTERPFDPD);
34063 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
34064 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34065 IX86_BUILTIN_SCATTERPFDPS);
34066 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
34067 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34068 IX86_BUILTIN_SCATTERPFQPD);
34069 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
34070 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34071 IX86_BUILTIN_SCATTERPFQPS);
34073 /* SHA */
34074 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
34075 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
34076 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
34077 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
34078 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
34079 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
34080 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
34081 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
34082 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
34083 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
34084 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
34085 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
34086 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
34087 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
34089 /* RTM. */
34090 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
34091 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
34093 /* MMX access to the vec_init patterns. */
34094 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
34095 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
34097 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
34098 V4HI_FTYPE_HI_HI_HI_HI,
34099 IX86_BUILTIN_VEC_INIT_V4HI);
34101 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
34102 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
34103 IX86_BUILTIN_VEC_INIT_V8QI);
34105 /* Access to the vec_extract patterns. */
34106 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
34107 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
34108 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
34109 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
34110 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
34111 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
34112 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
34113 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
34114 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
34115 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
34117 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34118 "__builtin_ia32_vec_ext_v4hi",
34119 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
34121 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
34122 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
34124 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
34125 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
34127 /* Access to the vec_set patterns. */
34128 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
34129 "__builtin_ia32_vec_set_v2di",
34130 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
34132 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
34133 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
34135 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
34136 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
34138 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
34139 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
34141 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34142 "__builtin_ia32_vec_set_v4hi",
34143 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
34145 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
34146 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
34148 /* RDSEED */
34149 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
34150 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
34151 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
34152 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
34153 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
34154 "__builtin_ia32_rdseed_di_step",
34155 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
34157 /* ADCX */
34158 def_builtin (0, "__builtin_ia32_addcarryx_u32",
34159 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
34160 def_builtin (OPTION_MASK_ISA_64BIT,
34161 "__builtin_ia32_addcarryx_u64",
34162 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34163 IX86_BUILTIN_ADDCARRYX64);
34165 /* SBB */
34166 def_builtin (0, "__builtin_ia32_sbb_u32",
34167 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
34168 def_builtin (OPTION_MASK_ISA_64BIT,
34169 "__builtin_ia32_sbb_u64",
34170 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34171 IX86_BUILTIN_SBB64);
34173 /* Read/write FLAGS. */
34174 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
34175 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34176 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
34177 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34178 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
34179 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
34180 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
34181 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
34183 /* CLFLUSHOPT. */
34184 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
34185 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
34187 /* CLWB. */
34188 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
34189 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
34191 /* Add FMA4 multi-arg argument instructions */
34192 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
34194 if (d->name == 0)
34195 continue;
34197 ftype = (enum ix86_builtin_func_type) d->flag;
34198 def_builtin_const (d->mask, d->name, ftype, d->code);
34202 static void
34203 ix86_init_mpx_builtins ()
34205 const struct builtin_description * d;
34206 enum ix86_builtin_func_type ftype;
34207 tree decl;
34208 size_t i;
34210 for (i = 0, d = bdesc_mpx;
34211 i < ARRAY_SIZE (bdesc_mpx);
34212 i++, d++)
34214 if (d->name == 0)
34215 continue;
34217 ftype = (enum ix86_builtin_func_type) d->flag;
34218 decl = def_builtin (d->mask, d->name, ftype, d->code);
34220 /* With no leaf and nothrow flags for MPX builtins
34221 abnormal edges may follow its call when setjmp
34222 presents in the function. Since we may have a lot
34223 of MPX builtins calls it causes lots of useless
34224 edges and enormous PHI nodes. To avoid this we mark
34225 MPX builtins as leaf and nothrow. */
34226 if (decl)
34228 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34229 NULL_TREE);
34230 TREE_NOTHROW (decl) = 1;
34232 else
34234 ix86_builtins_isa[(int)d->code].leaf_p = true;
34235 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34239 for (i = 0, d = bdesc_mpx_const;
34240 i < ARRAY_SIZE (bdesc_mpx_const);
34241 i++, d++)
34243 if (d->name == 0)
34244 continue;
34246 ftype = (enum ix86_builtin_func_type) d->flag;
34247 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
34249 if (decl)
34251 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34252 NULL_TREE);
34253 TREE_NOTHROW (decl) = 1;
34255 else
34257 ix86_builtins_isa[(int)d->code].leaf_p = true;
34258 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34263 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
34264 to return a pointer to VERSION_DECL if the outcome of the expression
34265 formed by PREDICATE_CHAIN is true. This function will be called during
34266 version dispatch to decide which function version to execute. It returns
34267 the basic block at the end, to which more conditions can be added. */
34269 static basic_block
34270 add_condition_to_bb (tree function_decl, tree version_decl,
34271 tree predicate_chain, basic_block new_bb)
34273 gimple return_stmt;
34274 tree convert_expr, result_var;
34275 gimple convert_stmt;
34276 gimple call_cond_stmt;
34277 gimple if_else_stmt;
34279 basic_block bb1, bb2, bb3;
34280 edge e12, e23;
34282 tree cond_var, and_expr_var = NULL_TREE;
34283 gimple_seq gseq;
34285 tree predicate_decl, predicate_arg;
34287 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
34289 gcc_assert (new_bb != NULL);
34290 gseq = bb_seq (new_bb);
34293 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
34294 build_fold_addr_expr (version_decl));
34295 result_var = create_tmp_var (ptr_type_node);
34296 convert_stmt = gimple_build_assign (result_var, convert_expr);
34297 return_stmt = gimple_build_return (result_var);
34299 if (predicate_chain == NULL_TREE)
34301 gimple_seq_add_stmt (&gseq, convert_stmt);
34302 gimple_seq_add_stmt (&gseq, return_stmt);
34303 set_bb_seq (new_bb, gseq);
34304 gimple_set_bb (convert_stmt, new_bb);
34305 gimple_set_bb (return_stmt, new_bb);
34306 pop_cfun ();
34307 return new_bb;
34310 while (predicate_chain != NULL)
34312 cond_var = create_tmp_var (integer_type_node);
34313 predicate_decl = TREE_PURPOSE (predicate_chain);
34314 predicate_arg = TREE_VALUE (predicate_chain);
34315 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
34316 gimple_call_set_lhs (call_cond_stmt, cond_var);
34318 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
34319 gimple_set_bb (call_cond_stmt, new_bb);
34320 gimple_seq_add_stmt (&gseq, call_cond_stmt);
34322 predicate_chain = TREE_CHAIN (predicate_chain);
34324 if (and_expr_var == NULL)
34325 and_expr_var = cond_var;
34326 else
34328 gimple assign_stmt;
34329 /* Use MIN_EXPR to check if any integer is zero?.
34330 and_expr_var = min_expr <cond_var, and_expr_var> */
34331 assign_stmt = gimple_build_assign (and_expr_var,
34332 build2 (MIN_EXPR, integer_type_node,
34333 cond_var, and_expr_var));
34335 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
34336 gimple_set_bb (assign_stmt, new_bb);
34337 gimple_seq_add_stmt (&gseq, assign_stmt);
34341 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34342 integer_zero_node,
34343 NULL_TREE, NULL_TREE);
34344 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34345 gimple_set_bb (if_else_stmt, new_bb);
34346 gimple_seq_add_stmt (&gseq, if_else_stmt);
34348 gimple_seq_add_stmt (&gseq, convert_stmt);
34349 gimple_seq_add_stmt (&gseq, return_stmt);
34350 set_bb_seq (new_bb, gseq);
34352 bb1 = new_bb;
34353 e12 = split_block (bb1, if_else_stmt);
34354 bb2 = e12->dest;
34355 e12->flags &= ~EDGE_FALLTHRU;
34356 e12->flags |= EDGE_TRUE_VALUE;
34358 e23 = split_block (bb2, return_stmt);
34360 gimple_set_bb (convert_stmt, bb2);
34361 gimple_set_bb (return_stmt, bb2);
34363 bb3 = e23->dest;
34364 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34366 remove_edge (e23);
34367 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34369 pop_cfun ();
34371 return bb3;
34374 /* This parses the attribute arguments to target in DECL and determines
34375 the right builtin to use to match the platform specification.
34376 It returns the priority value for this version decl. If PREDICATE_LIST
34377 is not NULL, it stores the list of cpu features that need to be checked
34378 before dispatching this function. */
34380 static unsigned int
34381 get_builtin_code_for_version (tree decl, tree *predicate_list)
34383 tree attrs;
34384 struct cl_target_option cur_target;
34385 tree target_node;
34386 struct cl_target_option *new_target;
34387 const char *arg_str = NULL;
34388 const char *attrs_str = NULL;
34389 char *tok_str = NULL;
34390 char *token;
34392 /* Priority of i386 features, greater value is higher priority. This is
34393 used to decide the order in which function dispatch must happen. For
34394 instance, a version specialized for SSE4.2 should be checked for dispatch
34395 before a version for SSE3, as SSE4.2 implies SSE3. */
34396 enum feature_priority
34398 P_ZERO = 0,
34399 P_MMX,
34400 P_SSE,
34401 P_SSE2,
34402 P_SSE3,
34403 P_SSSE3,
34404 P_PROC_SSSE3,
34405 P_SSE4_A,
34406 P_PROC_SSE4_A,
34407 P_SSE4_1,
34408 P_SSE4_2,
34409 P_PROC_SSE4_2,
34410 P_POPCNT,
34411 P_AVX,
34412 P_PROC_AVX,
34413 P_BMI,
34414 P_PROC_BMI,
34415 P_FMA4,
34416 P_XOP,
34417 P_PROC_XOP,
34418 P_FMA,
34419 P_PROC_FMA,
34420 P_BMI2,
34421 P_AVX2,
34422 P_PROC_AVX2,
34423 P_AVX512F,
34424 P_PROC_AVX512F
34427 enum feature_priority priority = P_ZERO;
34429 /* These are the target attribute strings for which a dispatcher is
34430 available, from fold_builtin_cpu. */
34432 static struct _feature_list
34434 const char *const name;
34435 const enum feature_priority priority;
34437 const feature_list[] =
34439 {"mmx", P_MMX},
34440 {"sse", P_SSE},
34441 {"sse2", P_SSE2},
34442 {"sse3", P_SSE3},
34443 {"sse4a", P_SSE4_A},
34444 {"ssse3", P_SSSE3},
34445 {"sse4.1", P_SSE4_1},
34446 {"sse4.2", P_SSE4_2},
34447 {"popcnt", P_POPCNT},
34448 {"avx", P_AVX},
34449 {"bmi", P_BMI},
34450 {"fma4", P_FMA4},
34451 {"xop", P_XOP},
34452 {"fma", P_FMA},
34453 {"bmi2", P_BMI2},
34454 {"avx2", P_AVX2},
34455 {"avx512f", P_AVX512F}
34459 static unsigned int NUM_FEATURES
34460 = sizeof (feature_list) / sizeof (struct _feature_list);
34462 unsigned int i;
34464 tree predicate_chain = NULL_TREE;
34465 tree predicate_decl, predicate_arg;
34467 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34468 gcc_assert (attrs != NULL);
34470 attrs = TREE_VALUE (TREE_VALUE (attrs));
34472 gcc_assert (TREE_CODE (attrs) == STRING_CST);
34473 attrs_str = TREE_STRING_POINTER (attrs);
34475 /* Return priority zero for default function. */
34476 if (strcmp (attrs_str, "default") == 0)
34477 return 0;
34479 /* Handle arch= if specified. For priority, set it to be 1 more than
34480 the best instruction set the processor can handle. For instance, if
34481 there is a version for atom and a version for ssse3 (the highest ISA
34482 priority for atom), the atom version must be checked for dispatch
34483 before the ssse3 version. */
34484 if (strstr (attrs_str, "arch=") != NULL)
34486 cl_target_option_save (&cur_target, &global_options);
34487 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34488 &global_options_set);
34490 gcc_assert (target_node);
34491 new_target = TREE_TARGET_OPTION (target_node);
34492 gcc_assert (new_target);
34494 if (new_target->arch_specified && new_target->arch > 0)
34496 switch (new_target->arch)
34498 case PROCESSOR_CORE2:
34499 arg_str = "core2";
34500 priority = P_PROC_SSSE3;
34501 break;
34502 case PROCESSOR_NEHALEM:
34503 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34504 arg_str = "westmere";
34505 else
34506 /* We translate "arch=corei7" and "arch=nehalem" to
34507 "corei7" so that it will be mapped to M_INTEL_COREI7
34508 as cpu type to cover all M_INTEL_COREI7_XXXs. */
34509 arg_str = "corei7";
34510 priority = P_PROC_SSE4_2;
34511 break;
34512 case PROCESSOR_SANDYBRIDGE:
34513 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34514 arg_str = "ivybridge";
34515 else
34516 arg_str = "sandybridge";
34517 priority = P_PROC_AVX;
34518 break;
34519 case PROCESSOR_HASWELL:
34520 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34521 arg_str = "broadwell";
34522 else
34523 arg_str = "haswell";
34524 priority = P_PROC_AVX2;
34525 break;
34526 case PROCESSOR_BONNELL:
34527 arg_str = "bonnell";
34528 priority = P_PROC_SSSE3;
34529 break;
34530 case PROCESSOR_KNL:
34531 arg_str = "knl";
34532 priority = P_PROC_AVX512F;
34533 break;
34534 case PROCESSOR_SILVERMONT:
34535 arg_str = "silvermont";
34536 priority = P_PROC_SSE4_2;
34537 break;
34538 case PROCESSOR_AMDFAM10:
34539 arg_str = "amdfam10h";
34540 priority = P_PROC_SSE4_A;
34541 break;
34542 case PROCESSOR_BTVER1:
34543 arg_str = "btver1";
34544 priority = P_PROC_SSE4_A;
34545 break;
34546 case PROCESSOR_BTVER2:
34547 arg_str = "btver2";
34548 priority = P_PROC_BMI;
34549 break;
34550 case PROCESSOR_BDVER1:
34551 arg_str = "bdver1";
34552 priority = P_PROC_XOP;
34553 break;
34554 case PROCESSOR_BDVER2:
34555 arg_str = "bdver2";
34556 priority = P_PROC_FMA;
34557 break;
34558 case PROCESSOR_BDVER3:
34559 arg_str = "bdver3";
34560 priority = P_PROC_FMA;
34561 break;
34562 case PROCESSOR_BDVER4:
34563 arg_str = "bdver4";
34564 priority = P_PROC_AVX2;
34565 break;
34569 cl_target_option_restore (&global_options, &cur_target);
34571 if (predicate_list && arg_str == NULL)
34573 error_at (DECL_SOURCE_LOCATION (decl),
34574 "No dispatcher found for the versioning attributes");
34575 return 0;
34578 if (predicate_list)
34580 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34581 /* For a C string literal the length includes the trailing NULL. */
34582 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34583 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34584 predicate_chain);
34588 /* Process feature name. */
34589 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
34590 strcpy (tok_str, attrs_str);
34591 token = strtok (tok_str, ",");
34592 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34594 while (token != NULL)
34596 /* Do not process "arch=" */
34597 if (strncmp (token, "arch=", 5) == 0)
34599 token = strtok (NULL, ",");
34600 continue;
34602 for (i = 0; i < NUM_FEATURES; ++i)
34604 if (strcmp (token, feature_list[i].name) == 0)
34606 if (predicate_list)
34608 predicate_arg = build_string_literal (
34609 strlen (feature_list[i].name) + 1,
34610 feature_list[i].name);
34611 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34612 predicate_chain);
34614 /* Find the maximum priority feature. */
34615 if (feature_list[i].priority > priority)
34616 priority = feature_list[i].priority;
34618 break;
34621 if (predicate_list && i == NUM_FEATURES)
34623 error_at (DECL_SOURCE_LOCATION (decl),
34624 "No dispatcher found for %s", token);
34625 return 0;
34627 token = strtok (NULL, ",");
34629 free (tok_str);
34631 if (predicate_list && predicate_chain == NULL_TREE)
34633 error_at (DECL_SOURCE_LOCATION (decl),
34634 "No dispatcher found for the versioning attributes : %s",
34635 attrs_str);
34636 return 0;
34638 else if (predicate_list)
34640 predicate_chain = nreverse (predicate_chain);
34641 *predicate_list = predicate_chain;
34644 return priority;
34647 /* This compares the priority of target features in function DECL1
34648 and DECL2. It returns positive value if DECL1 is higher priority,
34649 negative value if DECL2 is higher priority and 0 if they are the
34650 same. */
34652 static int
34653 ix86_compare_version_priority (tree decl1, tree decl2)
34655 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34656 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34658 return (int)priority1 - (int)priority2;
34661 /* V1 and V2 point to function versions with different priorities
34662 based on the target ISA. This function compares their priorities. */
34664 static int
34665 feature_compare (const void *v1, const void *v2)
34667 typedef struct _function_version_info
34669 tree version_decl;
34670 tree predicate_chain;
34671 unsigned int dispatch_priority;
34672 } function_version_info;
34674 const function_version_info c1 = *(const function_version_info *)v1;
34675 const function_version_info c2 = *(const function_version_info *)v2;
34676 return (c2.dispatch_priority - c1.dispatch_priority);
34679 /* This function generates the dispatch function for
34680 multi-versioned functions. DISPATCH_DECL is the function which will
34681 contain the dispatch logic. FNDECLS are the function choices for
34682 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
34683 in DISPATCH_DECL in which the dispatch code is generated. */
34685 static int
34686 dispatch_function_versions (tree dispatch_decl,
34687 void *fndecls_p,
34688 basic_block *empty_bb)
34690 tree default_decl;
34691 gimple ifunc_cpu_init_stmt;
34692 gimple_seq gseq;
34693 int ix;
34694 tree ele;
34695 vec<tree> *fndecls;
34696 unsigned int num_versions = 0;
34697 unsigned int actual_versions = 0;
34698 unsigned int i;
34700 struct _function_version_info
34702 tree version_decl;
34703 tree predicate_chain;
34704 unsigned int dispatch_priority;
34705 }*function_version_info;
34707 gcc_assert (dispatch_decl != NULL
34708 && fndecls_p != NULL
34709 && empty_bb != NULL);
34711 /*fndecls_p is actually a vector. */
34712 fndecls = static_cast<vec<tree> *> (fndecls_p);
34714 /* At least one more version other than the default. */
34715 num_versions = fndecls->length ();
34716 gcc_assert (num_versions >= 2);
34718 function_version_info = (struct _function_version_info *)
34719 XNEWVEC (struct _function_version_info, (num_versions - 1));
34721 /* The first version in the vector is the default decl. */
34722 default_decl = (*fndecls)[0];
34724 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34726 gseq = bb_seq (*empty_bb);
34727 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
34728 constructors, so explicity call __builtin_cpu_init here. */
34729 ifunc_cpu_init_stmt = gimple_build_call_vec (
34730 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34731 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34732 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34733 set_bb_seq (*empty_bb, gseq);
34735 pop_cfun ();
34738 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34740 tree version_decl = ele;
34741 tree predicate_chain = NULL_TREE;
34742 unsigned int priority;
34743 /* Get attribute string, parse it and find the right predicate decl.
34744 The predicate function could be a lengthy combination of many
34745 features, like arch-type and various isa-variants. */
34746 priority = get_builtin_code_for_version (version_decl,
34747 &predicate_chain);
34749 if (predicate_chain == NULL_TREE)
34750 continue;
34752 function_version_info [actual_versions].version_decl = version_decl;
34753 function_version_info [actual_versions].predicate_chain
34754 = predicate_chain;
34755 function_version_info [actual_versions].dispatch_priority = priority;
34756 actual_versions++;
34759 /* Sort the versions according to descending order of dispatch priority. The
34760 priority is based on the ISA. This is not a perfect solution. There
34761 could still be ambiguity. If more than one function version is suitable
34762 to execute, which one should be dispatched? In future, allow the user
34763 to specify a dispatch priority next to the version. */
34764 qsort (function_version_info, actual_versions,
34765 sizeof (struct _function_version_info), feature_compare);
34767 for (i = 0; i < actual_versions; ++i)
34768 *empty_bb = add_condition_to_bb (dispatch_decl,
34769 function_version_info[i].version_decl,
34770 function_version_info[i].predicate_chain,
34771 *empty_bb);
34773 /* dispatch default version at the end. */
34774 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34775 NULL, *empty_bb);
34777 free (function_version_info);
34778 return 0;
34781 /* Comparator function to be used in qsort routine to sort attribute
34782 specification strings to "target". */
34784 static int
34785 attr_strcmp (const void *v1, const void *v2)
34787 const char *c1 = *(char *const*)v1;
34788 const char *c2 = *(char *const*)v2;
34789 return strcmp (c1, c2);
34792 /* ARGLIST is the argument to target attribute. This function tokenizes
34793 the comma separated arguments, sorts them and returns a string which
34794 is a unique identifier for the comma separated arguments. It also
34795 replaces non-identifier characters "=,-" with "_". */
34797 static char *
34798 sorted_attr_string (tree arglist)
34800 tree arg;
34801 size_t str_len_sum = 0;
34802 char **args = NULL;
34803 char *attr_str, *ret_str;
34804 char *attr = NULL;
34805 unsigned int argnum = 1;
34806 unsigned int i;
34808 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34810 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34811 size_t len = strlen (str);
34812 str_len_sum += len + 1;
34813 if (arg != arglist)
34814 argnum++;
34815 for (i = 0; i < strlen (str); i++)
34816 if (str[i] == ',')
34817 argnum++;
34820 attr_str = XNEWVEC (char, str_len_sum);
34821 str_len_sum = 0;
34822 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34824 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34825 size_t len = strlen (str);
34826 memcpy (attr_str + str_len_sum, str, len);
34827 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
34828 str_len_sum += len + 1;
34831 /* Replace "=,-" with "_". */
34832 for (i = 0; i < strlen (attr_str); i++)
34833 if (attr_str[i] == '=' || attr_str[i]== '-')
34834 attr_str[i] = '_';
34836 if (argnum == 1)
34837 return attr_str;
34839 args = XNEWVEC (char *, argnum);
34841 i = 0;
34842 attr = strtok (attr_str, ",");
34843 while (attr != NULL)
34845 args[i] = attr;
34846 i++;
34847 attr = strtok (NULL, ",");
34850 qsort (args, argnum, sizeof (char *), attr_strcmp);
34852 ret_str = XNEWVEC (char, str_len_sum);
34853 str_len_sum = 0;
34854 for (i = 0; i < argnum; i++)
34856 size_t len = strlen (args[i]);
34857 memcpy (ret_str + str_len_sum, args[i], len);
34858 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
34859 str_len_sum += len + 1;
34862 XDELETEVEC (args);
34863 XDELETEVEC (attr_str);
34864 return ret_str;
34867 /* This function changes the assembler name for functions that are
34868 versions. If DECL is a function version and has a "target"
34869 attribute, it appends the attribute string to its assembler name. */
34871 static tree
34872 ix86_mangle_function_version_assembler_name (tree decl, tree id)
34874 tree version_attr;
34875 const char *orig_name, *version_string;
34876 char *attr_str, *assembler_name;
34878 if (DECL_DECLARED_INLINE_P (decl)
34879 && lookup_attribute ("gnu_inline",
34880 DECL_ATTRIBUTES (decl)))
34881 error_at (DECL_SOURCE_LOCATION (decl),
34882 "Function versions cannot be marked as gnu_inline,"
34883 " bodies have to be generated");
34885 if (DECL_VIRTUAL_P (decl)
34886 || DECL_VINDEX (decl))
34887 sorry ("Virtual function multiversioning not supported");
34889 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34891 /* target attribute string cannot be NULL. */
34892 gcc_assert (version_attr != NULL_TREE);
34894 orig_name = IDENTIFIER_POINTER (id);
34895 version_string
34896 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
34898 if (strcmp (version_string, "default") == 0)
34899 return id;
34901 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
34902 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
34904 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
34906 /* Allow assembler name to be modified if already set. */
34907 if (DECL_ASSEMBLER_NAME_SET_P (decl))
34908 SET_DECL_RTL (decl, NULL);
34910 tree ret = get_identifier (assembler_name);
34911 XDELETEVEC (attr_str);
34912 XDELETEVEC (assembler_name);
34913 return ret;
34916 /* This function returns true if FN1 and FN2 are versions of the same function,
34917 that is, the target strings of the function decls are different. This assumes
34918 that FN1 and FN2 have the same signature. */
34920 static bool
34921 ix86_function_versions (tree fn1, tree fn2)
34923 tree attr1, attr2;
34924 char *target1, *target2;
34925 bool result;
34927 if (TREE_CODE (fn1) != FUNCTION_DECL
34928 || TREE_CODE (fn2) != FUNCTION_DECL)
34929 return false;
34931 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
34932 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
34934 /* At least one function decl should have the target attribute specified. */
34935 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
34936 return false;
34938 /* Diagnose missing target attribute if one of the decls is already
34939 multi-versioned. */
34940 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
34942 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
34944 if (attr2 != NULL_TREE)
34946 tree tem = fn1;
34947 fn1 = fn2;
34948 fn2 = tem;
34949 attr1 = attr2;
34951 error_at (DECL_SOURCE_LOCATION (fn2),
34952 "missing %<target%> attribute for multi-versioned %D",
34953 fn2);
34954 inform (DECL_SOURCE_LOCATION (fn1),
34955 "previous declaration of %D", fn1);
34956 /* Prevent diagnosing of the same error multiple times. */
34957 DECL_ATTRIBUTES (fn2)
34958 = tree_cons (get_identifier ("target"),
34959 copy_node (TREE_VALUE (attr1)),
34960 DECL_ATTRIBUTES (fn2));
34962 return false;
34965 target1 = sorted_attr_string (TREE_VALUE (attr1));
34966 target2 = sorted_attr_string (TREE_VALUE (attr2));
34968 /* The sorted target strings must be different for fn1 and fn2
34969 to be versions. */
34970 if (strcmp (target1, target2) == 0)
34971 result = false;
34972 else
34973 result = true;
34975 XDELETEVEC (target1);
34976 XDELETEVEC (target2);
34978 return result;
34981 static tree
34982 ix86_mangle_decl_assembler_name (tree decl, tree id)
34984 /* For function version, add the target suffix to the assembler name. */
34985 if (TREE_CODE (decl) == FUNCTION_DECL
34986 && DECL_FUNCTION_VERSIONED (decl))
34987 id = ix86_mangle_function_version_assembler_name (decl, id);
34988 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
34989 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
34990 #endif
34992 return id;
34995 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
34996 is true, append the full path name of the source file. */
34998 static char *
34999 make_name (tree decl, const char *suffix, bool make_unique)
35001 char *global_var_name;
35002 int name_len;
35003 const char *name;
35004 const char *unique_name = NULL;
35006 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
35008 /* Get a unique name that can be used globally without any chances
35009 of collision at link time. */
35010 if (make_unique)
35011 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
35013 name_len = strlen (name) + strlen (suffix) + 2;
35015 if (make_unique)
35016 name_len += strlen (unique_name) + 1;
35017 global_var_name = XNEWVEC (char, name_len);
35019 /* Use '.' to concatenate names as it is demangler friendly. */
35020 if (make_unique)
35021 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
35022 suffix);
35023 else
35024 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
35026 return global_var_name;
35029 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35031 /* Make a dispatcher declaration for the multi-versioned function DECL.
35032 Calls to DECL function will be replaced with calls to the dispatcher
35033 by the front-end. Return the decl created. */
35035 static tree
35036 make_dispatcher_decl (const tree decl)
35038 tree func_decl;
35039 char *func_name;
35040 tree fn_type, func_type;
35041 bool is_uniq = false;
35043 if (TREE_PUBLIC (decl) == 0)
35044 is_uniq = true;
35046 func_name = make_name (decl, "ifunc", is_uniq);
35048 fn_type = TREE_TYPE (decl);
35049 func_type = build_function_type (TREE_TYPE (fn_type),
35050 TYPE_ARG_TYPES (fn_type));
35052 func_decl = build_fn_decl (func_name, func_type);
35053 XDELETEVEC (func_name);
35054 TREE_USED (func_decl) = 1;
35055 DECL_CONTEXT (func_decl) = NULL_TREE;
35056 DECL_INITIAL (func_decl) = error_mark_node;
35057 DECL_ARTIFICIAL (func_decl) = 1;
35058 /* Mark this func as external, the resolver will flip it again if
35059 it gets generated. */
35060 DECL_EXTERNAL (func_decl) = 1;
35061 /* This will be of type IFUNCs have to be externally visible. */
35062 TREE_PUBLIC (func_decl) = 1;
35064 return func_decl;
35067 #endif
35069 /* Returns true if decl is multi-versioned and DECL is the default function,
35070 that is it is not tagged with target specific optimization. */
35072 static bool
35073 is_function_default_version (const tree decl)
35075 if (TREE_CODE (decl) != FUNCTION_DECL
35076 || !DECL_FUNCTION_VERSIONED (decl))
35077 return false;
35078 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
35079 gcc_assert (attr);
35080 attr = TREE_VALUE (TREE_VALUE (attr));
35081 return (TREE_CODE (attr) == STRING_CST
35082 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
35085 /* Make a dispatcher declaration for the multi-versioned function DECL.
35086 Calls to DECL function will be replaced with calls to the dispatcher
35087 by the front-end. Returns the decl of the dispatcher function. */
35089 static tree
35090 ix86_get_function_versions_dispatcher (void *decl)
35092 tree fn = (tree) decl;
35093 struct cgraph_node *node = NULL;
35094 struct cgraph_node *default_node = NULL;
35095 struct cgraph_function_version_info *node_v = NULL;
35096 struct cgraph_function_version_info *first_v = NULL;
35098 tree dispatch_decl = NULL;
35100 struct cgraph_function_version_info *default_version_info = NULL;
35102 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
35104 node = cgraph_node::get (fn);
35105 gcc_assert (node != NULL);
35107 node_v = node->function_version ();
35108 gcc_assert (node_v != NULL);
35110 if (node_v->dispatcher_resolver != NULL)
35111 return node_v->dispatcher_resolver;
35113 /* Find the default version and make it the first node. */
35114 first_v = node_v;
35115 /* Go to the beginning of the chain. */
35116 while (first_v->prev != NULL)
35117 first_v = first_v->prev;
35118 default_version_info = first_v;
35119 while (default_version_info != NULL)
35121 if (is_function_default_version
35122 (default_version_info->this_node->decl))
35123 break;
35124 default_version_info = default_version_info->next;
35127 /* If there is no default node, just return NULL. */
35128 if (default_version_info == NULL)
35129 return NULL;
35131 /* Make default info the first node. */
35132 if (first_v != default_version_info)
35134 default_version_info->prev->next = default_version_info->next;
35135 if (default_version_info->next)
35136 default_version_info->next->prev = default_version_info->prev;
35137 first_v->prev = default_version_info;
35138 default_version_info->next = first_v;
35139 default_version_info->prev = NULL;
35142 default_node = default_version_info->this_node;
35144 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35145 if (targetm.has_ifunc_p ())
35147 struct cgraph_function_version_info *it_v = NULL;
35148 struct cgraph_node *dispatcher_node = NULL;
35149 struct cgraph_function_version_info *dispatcher_version_info = NULL;
35151 /* Right now, the dispatching is done via ifunc. */
35152 dispatch_decl = make_dispatcher_decl (default_node->decl);
35154 dispatcher_node = cgraph_node::get_create (dispatch_decl);
35155 gcc_assert (dispatcher_node != NULL);
35156 dispatcher_node->dispatcher_function = 1;
35157 dispatcher_version_info
35158 = dispatcher_node->insert_new_function_version ();
35159 dispatcher_version_info->next = default_version_info;
35160 dispatcher_node->definition = 1;
35162 /* Set the dispatcher for all the versions. */
35163 it_v = default_version_info;
35164 while (it_v != NULL)
35166 it_v->dispatcher_resolver = dispatch_decl;
35167 it_v = it_v->next;
35170 else
35171 #endif
35173 error_at (DECL_SOURCE_LOCATION (default_node->decl),
35174 "multiversioning needs ifunc which is not supported "
35175 "on this target");
35178 return dispatch_decl;
35181 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
35182 it to CHAIN. */
35184 static tree
35185 make_attribute (const char *name, const char *arg_name, tree chain)
35187 tree attr_name;
35188 tree attr_arg_name;
35189 tree attr_args;
35190 tree attr;
35192 attr_name = get_identifier (name);
35193 attr_arg_name = build_string (strlen (arg_name), arg_name);
35194 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
35195 attr = tree_cons (attr_name, attr_args, chain);
35196 return attr;
35199 /* Make the resolver function decl to dispatch the versions of
35200 a multi-versioned function, DEFAULT_DECL. Create an
35201 empty basic block in the resolver and store the pointer in
35202 EMPTY_BB. Return the decl of the resolver function. */
35204 static tree
35205 make_resolver_func (const tree default_decl,
35206 const tree dispatch_decl,
35207 basic_block *empty_bb)
35209 char *resolver_name;
35210 tree decl, type, decl_name, t;
35211 bool is_uniq = false;
35213 /* IFUNC's have to be globally visible. So, if the default_decl is
35214 not, then the name of the IFUNC should be made unique. */
35215 if (TREE_PUBLIC (default_decl) == 0)
35216 is_uniq = true;
35218 /* Append the filename to the resolver function if the versions are
35219 not externally visible. This is because the resolver function has
35220 to be externally visible for the loader to find it. So, appending
35221 the filename will prevent conflicts with a resolver function from
35222 another module which is based on the same version name. */
35223 resolver_name = make_name (default_decl, "resolver", is_uniq);
35225 /* The resolver function should return a (void *). */
35226 type = build_function_type_list (ptr_type_node, NULL_TREE);
35228 decl = build_fn_decl (resolver_name, type);
35229 decl_name = get_identifier (resolver_name);
35230 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
35232 DECL_NAME (decl) = decl_name;
35233 TREE_USED (decl) = 1;
35234 DECL_ARTIFICIAL (decl) = 1;
35235 DECL_IGNORED_P (decl) = 0;
35236 /* IFUNC resolvers have to be externally visible. */
35237 TREE_PUBLIC (decl) = 1;
35238 DECL_UNINLINABLE (decl) = 1;
35240 /* Resolver is not external, body is generated. */
35241 DECL_EXTERNAL (decl) = 0;
35242 DECL_EXTERNAL (dispatch_decl) = 0;
35244 DECL_CONTEXT (decl) = NULL_TREE;
35245 DECL_INITIAL (decl) = make_node (BLOCK);
35246 DECL_STATIC_CONSTRUCTOR (decl) = 0;
35248 if (DECL_COMDAT_GROUP (default_decl)
35249 || TREE_PUBLIC (default_decl))
35251 /* In this case, each translation unit with a call to this
35252 versioned function will put out a resolver. Ensure it
35253 is comdat to keep just one copy. */
35254 DECL_COMDAT (decl) = 1;
35255 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
35257 /* Build result decl and add to function_decl. */
35258 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
35259 DECL_ARTIFICIAL (t) = 1;
35260 DECL_IGNORED_P (t) = 1;
35261 DECL_RESULT (decl) = t;
35263 gimplify_function_tree (decl);
35264 push_cfun (DECL_STRUCT_FUNCTION (decl));
35265 *empty_bb = init_lowered_empty_function (decl, false, 0);
35267 cgraph_node::add_new_function (decl, true);
35268 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
35270 pop_cfun ();
35272 gcc_assert (dispatch_decl != NULL);
35273 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
35274 DECL_ATTRIBUTES (dispatch_decl)
35275 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
35277 /* Create the alias for dispatch to resolver here. */
35278 /*cgraph_create_function_alias (dispatch_decl, decl);*/
35279 cgraph_node::create_same_body_alias (dispatch_decl, decl);
35280 XDELETEVEC (resolver_name);
35281 return decl;
35284 /* Generate the dispatching code body to dispatch multi-versioned function
35285 DECL. The target hook is called to process the "target" attributes and
35286 provide the code to dispatch the right function at run-time. NODE points
35287 to the dispatcher decl whose body will be created. */
35289 static tree
35290 ix86_generate_version_dispatcher_body (void *node_p)
35292 tree resolver_decl;
35293 basic_block empty_bb;
35294 tree default_ver_decl;
35295 struct cgraph_node *versn;
35296 struct cgraph_node *node;
35298 struct cgraph_function_version_info *node_version_info = NULL;
35299 struct cgraph_function_version_info *versn_info = NULL;
35301 node = (cgraph_node *)node_p;
35303 node_version_info = node->function_version ();
35304 gcc_assert (node->dispatcher_function
35305 && node_version_info != NULL);
35307 if (node_version_info->dispatcher_resolver)
35308 return node_version_info->dispatcher_resolver;
35310 /* The first version in the chain corresponds to the default version. */
35311 default_ver_decl = node_version_info->next->this_node->decl;
35313 /* node is going to be an alias, so remove the finalized bit. */
35314 node->definition = false;
35316 resolver_decl = make_resolver_func (default_ver_decl,
35317 node->decl, &empty_bb);
35319 node_version_info->dispatcher_resolver = resolver_decl;
35321 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
35323 auto_vec<tree, 2> fn_ver_vec;
35325 for (versn_info = node_version_info->next; versn_info;
35326 versn_info = versn_info->next)
35328 versn = versn_info->this_node;
35329 /* Check for virtual functions here again, as by this time it should
35330 have been determined if this function needs a vtable index or
35331 not. This happens for methods in derived classes that override
35332 virtual methods in base classes but are not explicitly marked as
35333 virtual. */
35334 if (DECL_VINDEX (versn->decl))
35335 sorry ("Virtual function multiversioning not supported");
35337 fn_ver_vec.safe_push (versn->decl);
35340 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
35341 cgraph_edge::rebuild_edges ();
35342 pop_cfun ();
35343 return resolver_decl;
35345 /* This builds the processor_model struct type defined in
35346 libgcc/config/i386/cpuinfo.c */
35348 static tree
35349 build_processor_model_struct (void)
35351 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
35352 "__cpu_features"};
35353 tree field = NULL_TREE, field_chain = NULL_TREE;
35354 int i;
35355 tree type = make_node (RECORD_TYPE);
35357 /* The first 3 fields are unsigned int. */
35358 for (i = 0; i < 3; ++i)
35360 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35361 get_identifier (field_name[i]), unsigned_type_node);
35362 if (field_chain != NULL_TREE)
35363 DECL_CHAIN (field) = field_chain;
35364 field_chain = field;
35367 /* The last field is an array of unsigned integers of size one. */
35368 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35369 get_identifier (field_name[3]),
35370 build_array_type (unsigned_type_node,
35371 build_index_type (size_one_node)));
35372 if (field_chain != NULL_TREE)
35373 DECL_CHAIN (field) = field_chain;
35374 field_chain = field;
35376 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35377 return type;
35380 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35382 static tree
35383 make_var_decl (tree type, const char *name)
35385 tree new_decl;
35387 new_decl = build_decl (UNKNOWN_LOCATION,
35388 VAR_DECL,
35389 get_identifier(name),
35390 type);
35392 DECL_EXTERNAL (new_decl) = 1;
35393 TREE_STATIC (new_decl) = 1;
35394 TREE_PUBLIC (new_decl) = 1;
35395 DECL_INITIAL (new_decl) = 0;
35396 DECL_ARTIFICIAL (new_decl) = 0;
35397 DECL_PRESERVE_P (new_decl) = 1;
35399 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35400 assemble_variable (new_decl, 0, 0, 0);
35402 return new_decl;
35405 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35406 into an integer defined in libgcc/config/i386/cpuinfo.c */
35408 static tree
35409 fold_builtin_cpu (tree fndecl, tree *args)
35411 unsigned int i;
35412 enum ix86_builtins fn_code = (enum ix86_builtins)
35413 DECL_FUNCTION_CODE (fndecl);
35414 tree param_string_cst = NULL;
35416 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35417 enum processor_features
35419 F_CMOV = 0,
35420 F_MMX,
35421 F_POPCNT,
35422 F_SSE,
35423 F_SSE2,
35424 F_SSE3,
35425 F_SSSE3,
35426 F_SSE4_1,
35427 F_SSE4_2,
35428 F_AVX,
35429 F_AVX2,
35430 F_SSE4_A,
35431 F_FMA4,
35432 F_XOP,
35433 F_FMA,
35434 F_AVX512F,
35435 F_BMI,
35436 F_BMI2,
35437 F_MAX
35440 /* These are the values for vendor types and cpu types and subtypes
35441 in cpuinfo.c. Cpu types and subtypes should be subtracted by
35442 the corresponding start value. */
35443 enum processor_model
35445 M_INTEL = 1,
35446 M_AMD,
35447 M_CPU_TYPE_START,
35448 M_INTEL_BONNELL,
35449 M_INTEL_CORE2,
35450 M_INTEL_COREI7,
35451 M_AMDFAM10H,
35452 M_AMDFAM15H,
35453 M_INTEL_SILVERMONT,
35454 M_INTEL_KNL,
35455 M_AMD_BTVER1,
35456 M_AMD_BTVER2,
35457 M_CPU_SUBTYPE_START,
35458 M_INTEL_COREI7_NEHALEM,
35459 M_INTEL_COREI7_WESTMERE,
35460 M_INTEL_COREI7_SANDYBRIDGE,
35461 M_AMDFAM10H_BARCELONA,
35462 M_AMDFAM10H_SHANGHAI,
35463 M_AMDFAM10H_ISTANBUL,
35464 M_AMDFAM15H_BDVER1,
35465 M_AMDFAM15H_BDVER2,
35466 M_AMDFAM15H_BDVER3,
35467 M_AMDFAM15H_BDVER4,
35468 M_INTEL_COREI7_IVYBRIDGE,
35469 M_INTEL_COREI7_HASWELL,
35470 M_INTEL_COREI7_BROADWELL
35473 static struct _arch_names_table
35475 const char *const name;
35476 const enum processor_model model;
35478 const arch_names_table[] =
35480 {"amd", M_AMD},
35481 {"intel", M_INTEL},
35482 {"atom", M_INTEL_BONNELL},
35483 {"slm", M_INTEL_SILVERMONT},
35484 {"core2", M_INTEL_CORE2},
35485 {"corei7", M_INTEL_COREI7},
35486 {"nehalem", M_INTEL_COREI7_NEHALEM},
35487 {"westmere", M_INTEL_COREI7_WESTMERE},
35488 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35489 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35490 {"haswell", M_INTEL_COREI7_HASWELL},
35491 {"broadwell", M_INTEL_COREI7_BROADWELL},
35492 {"bonnell", M_INTEL_BONNELL},
35493 {"silvermont", M_INTEL_SILVERMONT},
35494 {"knl", M_INTEL_KNL},
35495 {"amdfam10h", M_AMDFAM10H},
35496 {"barcelona", M_AMDFAM10H_BARCELONA},
35497 {"shanghai", M_AMDFAM10H_SHANGHAI},
35498 {"istanbul", M_AMDFAM10H_ISTANBUL},
35499 {"btver1", M_AMD_BTVER1},
35500 {"amdfam15h", M_AMDFAM15H},
35501 {"bdver1", M_AMDFAM15H_BDVER1},
35502 {"bdver2", M_AMDFAM15H_BDVER2},
35503 {"bdver3", M_AMDFAM15H_BDVER3},
35504 {"bdver4", M_AMDFAM15H_BDVER4},
35505 {"btver2", M_AMD_BTVER2},
35508 static struct _isa_names_table
35510 const char *const name;
35511 const enum processor_features feature;
35513 const isa_names_table[] =
35515 {"cmov", F_CMOV},
35516 {"mmx", F_MMX},
35517 {"popcnt", F_POPCNT},
35518 {"sse", F_SSE},
35519 {"sse2", F_SSE2},
35520 {"sse3", F_SSE3},
35521 {"ssse3", F_SSSE3},
35522 {"sse4a", F_SSE4_A},
35523 {"sse4.1", F_SSE4_1},
35524 {"sse4.2", F_SSE4_2},
35525 {"avx", F_AVX},
35526 {"fma4", F_FMA4},
35527 {"xop", F_XOP},
35528 {"fma", F_FMA},
35529 {"avx2", F_AVX2},
35530 {"avx512f",F_AVX512F},
35531 {"bmi", F_BMI},
35532 {"bmi2", F_BMI2}
35535 tree __processor_model_type = build_processor_model_struct ();
35536 tree __cpu_model_var = make_var_decl (__processor_model_type,
35537 "__cpu_model");
35540 varpool_node::add (__cpu_model_var);
35542 gcc_assert ((args != NULL) && (*args != NULL));
35544 param_string_cst = *args;
35545 while (param_string_cst
35546 && TREE_CODE (param_string_cst) != STRING_CST)
35548 /* *args must be a expr that can contain other EXPRS leading to a
35549 STRING_CST. */
35550 if (!EXPR_P (param_string_cst))
35552 error ("Parameter to builtin must be a string constant or literal");
35553 return integer_zero_node;
35555 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35558 gcc_assert (param_string_cst);
35560 if (fn_code == IX86_BUILTIN_CPU_IS)
35562 tree ref;
35563 tree field;
35564 tree final;
35566 unsigned int field_val = 0;
35567 unsigned int NUM_ARCH_NAMES
35568 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35570 for (i = 0; i < NUM_ARCH_NAMES; i++)
35571 if (strcmp (arch_names_table[i].name,
35572 TREE_STRING_POINTER (param_string_cst)) == 0)
35573 break;
35575 if (i == NUM_ARCH_NAMES)
35577 error ("Parameter to builtin not valid: %s",
35578 TREE_STRING_POINTER (param_string_cst));
35579 return integer_zero_node;
35582 field = TYPE_FIELDS (__processor_model_type);
35583 field_val = arch_names_table[i].model;
35585 /* CPU types are stored in the next field. */
35586 if (field_val > M_CPU_TYPE_START
35587 && field_val < M_CPU_SUBTYPE_START)
35589 field = DECL_CHAIN (field);
35590 field_val -= M_CPU_TYPE_START;
35593 /* CPU subtypes are stored in the next field. */
35594 if (field_val > M_CPU_SUBTYPE_START)
35596 field = DECL_CHAIN ( DECL_CHAIN (field));
35597 field_val -= M_CPU_SUBTYPE_START;
35600 /* Get the appropriate field in __cpu_model. */
35601 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35602 field, NULL_TREE);
35604 /* Check the value. */
35605 final = build2 (EQ_EXPR, unsigned_type_node, ref,
35606 build_int_cstu (unsigned_type_node, field_val));
35607 return build1 (CONVERT_EXPR, integer_type_node, final);
35609 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35611 tree ref;
35612 tree array_elt;
35613 tree field;
35614 tree final;
35616 unsigned int field_val = 0;
35617 unsigned int NUM_ISA_NAMES
35618 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35620 for (i = 0; i < NUM_ISA_NAMES; i++)
35621 if (strcmp (isa_names_table[i].name,
35622 TREE_STRING_POINTER (param_string_cst)) == 0)
35623 break;
35625 if (i == NUM_ISA_NAMES)
35627 error ("Parameter to builtin not valid: %s",
35628 TREE_STRING_POINTER (param_string_cst));
35629 return integer_zero_node;
35632 field = TYPE_FIELDS (__processor_model_type);
35633 /* Get the last field, which is __cpu_features. */
35634 while (DECL_CHAIN (field))
35635 field = DECL_CHAIN (field);
35637 /* Get the appropriate field: __cpu_model.__cpu_features */
35638 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35639 field, NULL_TREE);
35641 /* Access the 0th element of __cpu_features array. */
35642 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35643 integer_zero_node, NULL_TREE, NULL_TREE);
35645 field_val = (1 << isa_names_table[i].feature);
35646 /* Return __cpu_model.__cpu_features[0] & field_val */
35647 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35648 build_int_cstu (unsigned_type_node, field_val));
35649 return build1 (CONVERT_EXPR, integer_type_node, final);
35651 gcc_unreachable ();
35654 static tree
35655 ix86_fold_builtin (tree fndecl, int n_args,
35656 tree *args, bool ignore ATTRIBUTE_UNUSED)
35658 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35660 enum ix86_builtins fn_code = (enum ix86_builtins)
35661 DECL_FUNCTION_CODE (fndecl);
35662 if (fn_code == IX86_BUILTIN_CPU_IS
35663 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35665 gcc_assert (n_args == 1);
35666 return fold_builtin_cpu (fndecl, args);
35670 #ifdef SUBTARGET_FOLD_BUILTIN
35671 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35672 #endif
35674 return NULL_TREE;
35677 /* Make builtins to detect cpu type and features supported. NAME is
35678 the builtin name, CODE is the builtin code, and FTYPE is the function
35679 type of the builtin. */
35681 static void
35682 make_cpu_type_builtin (const char* name, int code,
35683 enum ix86_builtin_func_type ftype, bool is_const)
35685 tree decl;
35686 tree type;
35688 type = ix86_get_builtin_func_type (ftype);
35689 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35690 NULL, NULL_TREE);
35691 gcc_assert (decl != NULL_TREE);
35692 ix86_builtins[(int) code] = decl;
35693 TREE_READONLY (decl) = is_const;
35696 /* Make builtins to get CPU type and features supported. The created
35697 builtins are :
35699 __builtin_cpu_init (), to detect cpu type and features,
35700 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35701 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35704 static void
35705 ix86_init_platform_type_builtins (void)
35707 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35708 INT_FTYPE_VOID, false);
35709 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35710 INT_FTYPE_PCCHAR, true);
35711 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35712 INT_FTYPE_PCCHAR, true);
35715 /* Internal method for ix86_init_builtins. */
35717 static void
35718 ix86_init_builtins_va_builtins_abi (void)
35720 tree ms_va_ref, sysv_va_ref;
35721 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35722 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35723 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35724 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35726 if (!TARGET_64BIT)
35727 return;
35728 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35729 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35730 ms_va_ref = build_reference_type (ms_va_list_type_node);
35731 sysv_va_ref =
35732 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35734 fnvoid_va_end_ms =
35735 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35736 fnvoid_va_start_ms =
35737 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35738 fnvoid_va_end_sysv =
35739 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35740 fnvoid_va_start_sysv =
35741 build_varargs_function_type_list (void_type_node, sysv_va_ref,
35742 NULL_TREE);
35743 fnvoid_va_copy_ms =
35744 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35745 NULL_TREE);
35746 fnvoid_va_copy_sysv =
35747 build_function_type_list (void_type_node, sysv_va_ref,
35748 sysv_va_ref, NULL_TREE);
35750 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35751 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35752 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35753 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35754 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35755 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35756 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35757 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35758 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35759 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35760 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35761 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35764 static void
35765 ix86_init_builtin_types (void)
35767 tree float128_type_node, float80_type_node;
35769 /* The __float80 type. */
35770 float80_type_node = long_double_type_node;
35771 if (TYPE_MODE (float80_type_node) != XFmode)
35773 /* The __float80 type. */
35774 float80_type_node = make_node (REAL_TYPE);
35776 TYPE_PRECISION (float80_type_node) = 80;
35777 layout_type (float80_type_node);
35779 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35781 /* The __float128 type. */
35782 float128_type_node = make_node (REAL_TYPE);
35783 TYPE_PRECISION (float128_type_node) = 128;
35784 layout_type (float128_type_node);
35785 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35787 /* This macro is built by i386-builtin-types.awk. */
35788 DEFINE_BUILTIN_PRIMITIVE_TYPES;
35791 static void
35792 ix86_init_builtins (void)
35794 tree t;
35796 ix86_init_builtin_types ();
35798 /* Builtins to get CPU type and features. */
35799 ix86_init_platform_type_builtins ();
35801 /* TFmode support builtins. */
35802 def_builtin_const (0, "__builtin_infq",
35803 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
35804 def_builtin_const (0, "__builtin_huge_valq",
35805 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
35807 /* We will expand them to normal call if SSE isn't available since
35808 they are used by libgcc. */
35809 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
35810 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
35811 BUILT_IN_MD, "__fabstf2", NULL_TREE);
35812 TREE_READONLY (t) = 1;
35813 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
35815 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
35816 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
35817 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
35818 TREE_READONLY (t) = 1;
35819 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
35821 ix86_init_tm_builtins ();
35822 ix86_init_mmx_sse_builtins ();
35823 ix86_init_mpx_builtins ();
35825 if (TARGET_LP64)
35826 ix86_init_builtins_va_builtins_abi ();
35828 #ifdef SUBTARGET_INIT_BUILTINS
35829 SUBTARGET_INIT_BUILTINS;
35830 #endif
35833 /* Return the ix86 builtin for CODE. */
35835 static tree
35836 ix86_builtin_decl (unsigned code, bool)
35838 if (code >= IX86_BUILTIN_MAX)
35839 return error_mark_node;
35841 return ix86_builtins[code];
35844 /* Errors in the source file can cause expand_expr to return const0_rtx
35845 where we expect a vector. To avoid crashing, use one of the vector
35846 clear instructions. */
35847 static rtx
35848 safe_vector_operand (rtx x, machine_mode mode)
35850 if (x == const0_rtx)
35851 x = CONST0_RTX (mode);
35852 return x;
35855 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
35857 static rtx
35858 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
35860 rtx pat;
35861 tree arg0 = CALL_EXPR_ARG (exp, 0);
35862 tree arg1 = CALL_EXPR_ARG (exp, 1);
35863 rtx op0 = expand_normal (arg0);
35864 rtx op1 = expand_normal (arg1);
35865 machine_mode tmode = insn_data[icode].operand[0].mode;
35866 machine_mode mode0 = insn_data[icode].operand[1].mode;
35867 machine_mode mode1 = insn_data[icode].operand[2].mode;
35869 if (VECTOR_MODE_P (mode0))
35870 op0 = safe_vector_operand (op0, mode0);
35871 if (VECTOR_MODE_P (mode1))
35872 op1 = safe_vector_operand (op1, mode1);
35874 if (optimize || !target
35875 || GET_MODE (target) != tmode
35876 || !insn_data[icode].operand[0].predicate (target, tmode))
35877 target = gen_reg_rtx (tmode);
35879 if (GET_MODE (op1) == SImode && mode1 == TImode)
35881 rtx x = gen_reg_rtx (V4SImode);
35882 emit_insn (gen_sse2_loadd (x, op1));
35883 op1 = gen_lowpart (TImode, x);
35886 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35887 op0 = copy_to_mode_reg (mode0, op0);
35888 if (!insn_data[icode].operand[2].predicate (op1, mode1))
35889 op1 = copy_to_mode_reg (mode1, op1);
35891 pat = GEN_FCN (icode) (target, op0, op1);
35892 if (! pat)
35893 return 0;
35895 emit_insn (pat);
35897 return target;
35900 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
35902 static rtx
35903 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
35904 enum ix86_builtin_func_type m_type,
35905 enum rtx_code sub_code)
35907 rtx pat;
35908 int i;
35909 int nargs;
35910 bool comparison_p = false;
35911 bool tf_p = false;
35912 bool last_arg_constant = false;
35913 int num_memory = 0;
35914 struct {
35915 rtx op;
35916 machine_mode mode;
35917 } args[4];
35919 machine_mode tmode = insn_data[icode].operand[0].mode;
35921 switch (m_type)
35923 case MULTI_ARG_4_DF2_DI_I:
35924 case MULTI_ARG_4_DF2_DI_I1:
35925 case MULTI_ARG_4_SF2_SI_I:
35926 case MULTI_ARG_4_SF2_SI_I1:
35927 nargs = 4;
35928 last_arg_constant = true;
35929 break;
35931 case MULTI_ARG_3_SF:
35932 case MULTI_ARG_3_DF:
35933 case MULTI_ARG_3_SF2:
35934 case MULTI_ARG_3_DF2:
35935 case MULTI_ARG_3_DI:
35936 case MULTI_ARG_3_SI:
35937 case MULTI_ARG_3_SI_DI:
35938 case MULTI_ARG_3_HI:
35939 case MULTI_ARG_3_HI_SI:
35940 case MULTI_ARG_3_QI:
35941 case MULTI_ARG_3_DI2:
35942 case MULTI_ARG_3_SI2:
35943 case MULTI_ARG_3_HI2:
35944 case MULTI_ARG_3_QI2:
35945 nargs = 3;
35946 break;
35948 case MULTI_ARG_2_SF:
35949 case MULTI_ARG_2_DF:
35950 case MULTI_ARG_2_DI:
35951 case MULTI_ARG_2_SI:
35952 case MULTI_ARG_2_HI:
35953 case MULTI_ARG_2_QI:
35954 nargs = 2;
35955 break;
35957 case MULTI_ARG_2_DI_IMM:
35958 case MULTI_ARG_2_SI_IMM:
35959 case MULTI_ARG_2_HI_IMM:
35960 case MULTI_ARG_2_QI_IMM:
35961 nargs = 2;
35962 last_arg_constant = true;
35963 break;
35965 case MULTI_ARG_1_SF:
35966 case MULTI_ARG_1_DF:
35967 case MULTI_ARG_1_SF2:
35968 case MULTI_ARG_1_DF2:
35969 case MULTI_ARG_1_DI:
35970 case MULTI_ARG_1_SI:
35971 case MULTI_ARG_1_HI:
35972 case MULTI_ARG_1_QI:
35973 case MULTI_ARG_1_SI_DI:
35974 case MULTI_ARG_1_HI_DI:
35975 case MULTI_ARG_1_HI_SI:
35976 case MULTI_ARG_1_QI_DI:
35977 case MULTI_ARG_1_QI_SI:
35978 case MULTI_ARG_1_QI_HI:
35979 nargs = 1;
35980 break;
35982 case MULTI_ARG_2_DI_CMP:
35983 case MULTI_ARG_2_SI_CMP:
35984 case MULTI_ARG_2_HI_CMP:
35985 case MULTI_ARG_2_QI_CMP:
35986 nargs = 2;
35987 comparison_p = true;
35988 break;
35990 case MULTI_ARG_2_SF_TF:
35991 case MULTI_ARG_2_DF_TF:
35992 case MULTI_ARG_2_DI_TF:
35993 case MULTI_ARG_2_SI_TF:
35994 case MULTI_ARG_2_HI_TF:
35995 case MULTI_ARG_2_QI_TF:
35996 nargs = 2;
35997 tf_p = true;
35998 break;
36000 default:
36001 gcc_unreachable ();
36004 if (optimize || !target
36005 || GET_MODE (target) != tmode
36006 || !insn_data[icode].operand[0].predicate (target, tmode))
36007 target = gen_reg_rtx (tmode);
36009 gcc_assert (nargs <= 4);
36011 for (i = 0; i < nargs; i++)
36013 tree arg = CALL_EXPR_ARG (exp, i);
36014 rtx op = expand_normal (arg);
36015 int adjust = (comparison_p) ? 1 : 0;
36016 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
36018 if (last_arg_constant && i == nargs - 1)
36020 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
36022 enum insn_code new_icode = icode;
36023 switch (icode)
36025 case CODE_FOR_xop_vpermil2v2df3:
36026 case CODE_FOR_xop_vpermil2v4sf3:
36027 case CODE_FOR_xop_vpermil2v4df3:
36028 case CODE_FOR_xop_vpermil2v8sf3:
36029 error ("the last argument must be a 2-bit immediate");
36030 return gen_reg_rtx (tmode);
36031 case CODE_FOR_xop_rotlv2di3:
36032 new_icode = CODE_FOR_rotlv2di3;
36033 goto xop_rotl;
36034 case CODE_FOR_xop_rotlv4si3:
36035 new_icode = CODE_FOR_rotlv4si3;
36036 goto xop_rotl;
36037 case CODE_FOR_xop_rotlv8hi3:
36038 new_icode = CODE_FOR_rotlv8hi3;
36039 goto xop_rotl;
36040 case CODE_FOR_xop_rotlv16qi3:
36041 new_icode = CODE_FOR_rotlv16qi3;
36042 xop_rotl:
36043 if (CONST_INT_P (op))
36045 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
36046 op = GEN_INT (INTVAL (op) & mask);
36047 gcc_checking_assert
36048 (insn_data[icode].operand[i + 1].predicate (op, mode));
36050 else
36052 gcc_checking_assert
36053 (nargs == 2
36054 && insn_data[new_icode].operand[0].mode == tmode
36055 && insn_data[new_icode].operand[1].mode == tmode
36056 && insn_data[new_icode].operand[2].mode == mode
36057 && insn_data[new_icode].operand[0].predicate
36058 == insn_data[icode].operand[0].predicate
36059 && insn_data[new_icode].operand[1].predicate
36060 == insn_data[icode].operand[1].predicate);
36061 icode = new_icode;
36062 goto non_constant;
36064 break;
36065 default:
36066 gcc_unreachable ();
36070 else
36072 non_constant:
36073 if (VECTOR_MODE_P (mode))
36074 op = safe_vector_operand (op, mode);
36076 /* If we aren't optimizing, only allow one memory operand to be
36077 generated. */
36078 if (memory_operand (op, mode))
36079 num_memory++;
36081 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
36083 if (optimize
36084 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
36085 || num_memory > 1)
36086 op = force_reg (mode, op);
36089 args[i].op = op;
36090 args[i].mode = mode;
36093 switch (nargs)
36095 case 1:
36096 pat = GEN_FCN (icode) (target, args[0].op);
36097 break;
36099 case 2:
36100 if (tf_p)
36101 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
36102 GEN_INT ((int)sub_code));
36103 else if (! comparison_p)
36104 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
36105 else
36107 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
36108 args[0].op,
36109 args[1].op);
36111 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
36113 break;
36115 case 3:
36116 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
36117 break;
36119 case 4:
36120 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
36121 break;
36123 default:
36124 gcc_unreachable ();
36127 if (! pat)
36128 return 0;
36130 emit_insn (pat);
36131 return target;
36134 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
36135 insns with vec_merge. */
36137 static rtx
36138 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
36139 rtx target)
36141 rtx pat;
36142 tree arg0 = CALL_EXPR_ARG (exp, 0);
36143 rtx op1, op0 = expand_normal (arg0);
36144 machine_mode tmode = insn_data[icode].operand[0].mode;
36145 machine_mode mode0 = insn_data[icode].operand[1].mode;
36147 if (optimize || !target
36148 || GET_MODE (target) != tmode
36149 || !insn_data[icode].operand[0].predicate (target, tmode))
36150 target = gen_reg_rtx (tmode);
36152 if (VECTOR_MODE_P (mode0))
36153 op0 = safe_vector_operand (op0, mode0);
36155 if ((optimize && !register_operand (op0, mode0))
36156 || !insn_data[icode].operand[1].predicate (op0, mode0))
36157 op0 = copy_to_mode_reg (mode0, op0);
36159 op1 = op0;
36160 if (!insn_data[icode].operand[2].predicate (op1, mode0))
36161 op1 = copy_to_mode_reg (mode0, op1);
36163 pat = GEN_FCN (icode) (target, op0, op1);
36164 if (! pat)
36165 return 0;
36166 emit_insn (pat);
36167 return target;
36170 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
36172 static rtx
36173 ix86_expand_sse_compare (const struct builtin_description *d,
36174 tree exp, rtx target, bool swap)
36176 rtx pat;
36177 tree arg0 = CALL_EXPR_ARG (exp, 0);
36178 tree arg1 = CALL_EXPR_ARG (exp, 1);
36179 rtx op0 = expand_normal (arg0);
36180 rtx op1 = expand_normal (arg1);
36181 rtx op2;
36182 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36183 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36184 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36185 enum rtx_code comparison = d->comparison;
36187 if (VECTOR_MODE_P (mode0))
36188 op0 = safe_vector_operand (op0, mode0);
36189 if (VECTOR_MODE_P (mode1))
36190 op1 = safe_vector_operand (op1, mode1);
36192 /* Swap operands if we have a comparison that isn't available in
36193 hardware. */
36194 if (swap)
36195 std::swap (op0, op1);
36197 if (optimize || !target
36198 || GET_MODE (target) != tmode
36199 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36200 target = gen_reg_rtx (tmode);
36202 if ((optimize && !register_operand (op0, mode0))
36203 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
36204 op0 = copy_to_mode_reg (mode0, op0);
36205 if ((optimize && !register_operand (op1, mode1))
36206 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
36207 op1 = copy_to_mode_reg (mode1, op1);
36209 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
36210 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36211 if (! pat)
36212 return 0;
36213 emit_insn (pat);
36214 return target;
36217 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
36219 static rtx
36220 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
36221 rtx target)
36223 rtx pat;
36224 tree arg0 = CALL_EXPR_ARG (exp, 0);
36225 tree arg1 = CALL_EXPR_ARG (exp, 1);
36226 rtx op0 = expand_normal (arg0);
36227 rtx op1 = expand_normal (arg1);
36228 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36229 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36230 enum rtx_code comparison = d->comparison;
36232 if (VECTOR_MODE_P (mode0))
36233 op0 = safe_vector_operand (op0, mode0);
36234 if (VECTOR_MODE_P (mode1))
36235 op1 = safe_vector_operand (op1, mode1);
36237 /* Swap operands if we have a comparison that isn't available in
36238 hardware. */
36239 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
36240 std::swap (op0, op1);
36242 target = gen_reg_rtx (SImode);
36243 emit_move_insn (target, const0_rtx);
36244 target = gen_rtx_SUBREG (QImode, target, 0);
36246 if ((optimize && !register_operand (op0, mode0))
36247 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36248 op0 = copy_to_mode_reg (mode0, op0);
36249 if ((optimize && !register_operand (op1, mode1))
36250 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36251 op1 = copy_to_mode_reg (mode1, op1);
36253 pat = GEN_FCN (d->icode) (op0, op1);
36254 if (! pat)
36255 return 0;
36256 emit_insn (pat);
36257 emit_insn (gen_rtx_SET (VOIDmode,
36258 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36259 gen_rtx_fmt_ee (comparison, QImode,
36260 SET_DEST (pat),
36261 const0_rtx)));
36263 return SUBREG_REG (target);
36266 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
36268 static rtx
36269 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
36270 rtx target)
36272 rtx pat;
36273 tree arg0 = CALL_EXPR_ARG (exp, 0);
36274 rtx op1, op0 = expand_normal (arg0);
36275 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36276 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36278 if (optimize || target == 0
36279 || GET_MODE (target) != tmode
36280 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36281 target = gen_reg_rtx (tmode);
36283 if (VECTOR_MODE_P (mode0))
36284 op0 = safe_vector_operand (op0, mode0);
36286 if ((optimize && !register_operand (op0, mode0))
36287 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36288 op0 = copy_to_mode_reg (mode0, op0);
36290 op1 = GEN_INT (d->comparison);
36292 pat = GEN_FCN (d->icode) (target, op0, op1);
36293 if (! pat)
36294 return 0;
36295 emit_insn (pat);
36296 return target;
36299 static rtx
36300 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
36301 tree exp, rtx target)
36303 rtx pat;
36304 tree arg0 = CALL_EXPR_ARG (exp, 0);
36305 tree arg1 = CALL_EXPR_ARG (exp, 1);
36306 rtx op0 = expand_normal (arg0);
36307 rtx op1 = expand_normal (arg1);
36308 rtx op2;
36309 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36310 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36311 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36313 if (optimize || target == 0
36314 || GET_MODE (target) != tmode
36315 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36316 target = gen_reg_rtx (tmode);
36318 op0 = safe_vector_operand (op0, mode0);
36319 op1 = safe_vector_operand (op1, mode1);
36321 if ((optimize && !register_operand (op0, mode0))
36322 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36323 op0 = copy_to_mode_reg (mode0, op0);
36324 if ((optimize && !register_operand (op1, mode1))
36325 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36326 op1 = copy_to_mode_reg (mode1, op1);
36328 op2 = GEN_INT (d->comparison);
36330 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36331 if (! pat)
36332 return 0;
36333 emit_insn (pat);
36334 return target;
36337 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
36339 static rtx
36340 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
36341 rtx target)
36343 rtx pat;
36344 tree arg0 = CALL_EXPR_ARG (exp, 0);
36345 tree arg1 = CALL_EXPR_ARG (exp, 1);
36346 rtx op0 = expand_normal (arg0);
36347 rtx op1 = expand_normal (arg1);
36348 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36349 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36350 enum rtx_code comparison = d->comparison;
36352 if (VECTOR_MODE_P (mode0))
36353 op0 = safe_vector_operand (op0, mode0);
36354 if (VECTOR_MODE_P (mode1))
36355 op1 = safe_vector_operand (op1, mode1);
36357 target = gen_reg_rtx (SImode);
36358 emit_move_insn (target, const0_rtx);
36359 target = gen_rtx_SUBREG (QImode, target, 0);
36361 if ((optimize && !register_operand (op0, mode0))
36362 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36363 op0 = copy_to_mode_reg (mode0, op0);
36364 if ((optimize && !register_operand (op1, mode1))
36365 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36366 op1 = copy_to_mode_reg (mode1, op1);
36368 pat = GEN_FCN (d->icode) (op0, op1);
36369 if (! pat)
36370 return 0;
36371 emit_insn (pat);
36372 emit_insn (gen_rtx_SET (VOIDmode,
36373 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36374 gen_rtx_fmt_ee (comparison, QImode,
36375 SET_DEST (pat),
36376 const0_rtx)));
36378 return SUBREG_REG (target);
36381 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
36383 static rtx
36384 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36385 tree exp, rtx target)
36387 rtx pat;
36388 tree arg0 = CALL_EXPR_ARG (exp, 0);
36389 tree arg1 = CALL_EXPR_ARG (exp, 1);
36390 tree arg2 = CALL_EXPR_ARG (exp, 2);
36391 tree arg3 = CALL_EXPR_ARG (exp, 3);
36392 tree arg4 = CALL_EXPR_ARG (exp, 4);
36393 rtx scratch0, scratch1;
36394 rtx op0 = expand_normal (arg0);
36395 rtx op1 = expand_normal (arg1);
36396 rtx op2 = expand_normal (arg2);
36397 rtx op3 = expand_normal (arg3);
36398 rtx op4 = expand_normal (arg4);
36399 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36401 tmode0 = insn_data[d->icode].operand[0].mode;
36402 tmode1 = insn_data[d->icode].operand[1].mode;
36403 modev2 = insn_data[d->icode].operand[2].mode;
36404 modei3 = insn_data[d->icode].operand[3].mode;
36405 modev4 = insn_data[d->icode].operand[4].mode;
36406 modei5 = insn_data[d->icode].operand[5].mode;
36407 modeimm = insn_data[d->icode].operand[6].mode;
36409 if (VECTOR_MODE_P (modev2))
36410 op0 = safe_vector_operand (op0, modev2);
36411 if (VECTOR_MODE_P (modev4))
36412 op2 = safe_vector_operand (op2, modev4);
36414 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36415 op0 = copy_to_mode_reg (modev2, op0);
36416 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36417 op1 = copy_to_mode_reg (modei3, op1);
36418 if ((optimize && !register_operand (op2, modev4))
36419 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36420 op2 = copy_to_mode_reg (modev4, op2);
36421 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36422 op3 = copy_to_mode_reg (modei5, op3);
36424 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36426 error ("the fifth argument must be an 8-bit immediate");
36427 return const0_rtx;
36430 if (d->code == IX86_BUILTIN_PCMPESTRI128)
36432 if (optimize || !target
36433 || GET_MODE (target) != tmode0
36434 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36435 target = gen_reg_rtx (tmode0);
36437 scratch1 = gen_reg_rtx (tmode1);
36439 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36441 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36443 if (optimize || !target
36444 || GET_MODE (target) != tmode1
36445 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36446 target = gen_reg_rtx (tmode1);
36448 scratch0 = gen_reg_rtx (tmode0);
36450 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36452 else
36454 gcc_assert (d->flag);
36456 scratch0 = gen_reg_rtx (tmode0);
36457 scratch1 = gen_reg_rtx (tmode1);
36459 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36462 if (! pat)
36463 return 0;
36465 emit_insn (pat);
36467 if (d->flag)
36469 target = gen_reg_rtx (SImode);
36470 emit_move_insn (target, const0_rtx);
36471 target = gen_rtx_SUBREG (QImode, target, 0);
36473 emit_insn
36474 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36475 gen_rtx_fmt_ee (EQ, QImode,
36476 gen_rtx_REG ((machine_mode) d->flag,
36477 FLAGS_REG),
36478 const0_rtx)));
36479 return SUBREG_REG (target);
36481 else
36482 return target;
36486 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
36488 static rtx
36489 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36490 tree exp, rtx target)
36492 rtx pat;
36493 tree arg0 = CALL_EXPR_ARG (exp, 0);
36494 tree arg1 = CALL_EXPR_ARG (exp, 1);
36495 tree arg2 = CALL_EXPR_ARG (exp, 2);
36496 rtx scratch0, scratch1;
36497 rtx op0 = expand_normal (arg0);
36498 rtx op1 = expand_normal (arg1);
36499 rtx op2 = expand_normal (arg2);
36500 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36502 tmode0 = insn_data[d->icode].operand[0].mode;
36503 tmode1 = insn_data[d->icode].operand[1].mode;
36504 modev2 = insn_data[d->icode].operand[2].mode;
36505 modev3 = insn_data[d->icode].operand[3].mode;
36506 modeimm = insn_data[d->icode].operand[4].mode;
36508 if (VECTOR_MODE_P (modev2))
36509 op0 = safe_vector_operand (op0, modev2);
36510 if (VECTOR_MODE_P (modev3))
36511 op1 = safe_vector_operand (op1, modev3);
36513 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36514 op0 = copy_to_mode_reg (modev2, op0);
36515 if ((optimize && !register_operand (op1, modev3))
36516 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36517 op1 = copy_to_mode_reg (modev3, op1);
36519 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36521 error ("the third argument must be an 8-bit immediate");
36522 return const0_rtx;
36525 if (d->code == IX86_BUILTIN_PCMPISTRI128)
36527 if (optimize || !target
36528 || GET_MODE (target) != tmode0
36529 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36530 target = gen_reg_rtx (tmode0);
36532 scratch1 = gen_reg_rtx (tmode1);
36534 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36536 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36538 if (optimize || !target
36539 || GET_MODE (target) != tmode1
36540 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36541 target = gen_reg_rtx (tmode1);
36543 scratch0 = gen_reg_rtx (tmode0);
36545 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36547 else
36549 gcc_assert (d->flag);
36551 scratch0 = gen_reg_rtx (tmode0);
36552 scratch1 = gen_reg_rtx (tmode1);
36554 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36557 if (! pat)
36558 return 0;
36560 emit_insn (pat);
36562 if (d->flag)
36564 target = gen_reg_rtx (SImode);
36565 emit_move_insn (target, const0_rtx);
36566 target = gen_rtx_SUBREG (QImode, target, 0);
36568 emit_insn
36569 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36570 gen_rtx_fmt_ee (EQ, QImode,
36571 gen_rtx_REG ((machine_mode) d->flag,
36572 FLAGS_REG),
36573 const0_rtx)));
36574 return SUBREG_REG (target);
36576 else
36577 return target;
36580 /* Subroutine of ix86_expand_builtin to take care of insns with
36581 variable number of operands. */
36583 static rtx
36584 ix86_expand_args_builtin (const struct builtin_description *d,
36585 tree exp, rtx target)
36587 rtx pat, real_target;
36588 unsigned int i, nargs;
36589 unsigned int nargs_constant = 0;
36590 unsigned int mask_pos = 0;
36591 int num_memory = 0;
36592 struct
36594 rtx op;
36595 machine_mode mode;
36596 } args[6];
36597 bool last_arg_count = false;
36598 enum insn_code icode = d->icode;
36599 const struct insn_data_d *insn_p = &insn_data[icode];
36600 machine_mode tmode = insn_p->operand[0].mode;
36601 machine_mode rmode = VOIDmode;
36602 bool swap = false;
36603 enum rtx_code comparison = d->comparison;
36605 switch ((enum ix86_builtin_func_type) d->flag)
36607 case V2DF_FTYPE_V2DF_ROUND:
36608 case V4DF_FTYPE_V4DF_ROUND:
36609 case V4SF_FTYPE_V4SF_ROUND:
36610 case V8SF_FTYPE_V8SF_ROUND:
36611 case V4SI_FTYPE_V4SF_ROUND:
36612 case V8SI_FTYPE_V8SF_ROUND:
36613 return ix86_expand_sse_round (d, exp, target);
36614 case V4SI_FTYPE_V2DF_V2DF_ROUND:
36615 case V8SI_FTYPE_V4DF_V4DF_ROUND:
36616 case V16SI_FTYPE_V8DF_V8DF_ROUND:
36617 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36618 case INT_FTYPE_V8SF_V8SF_PTEST:
36619 case INT_FTYPE_V4DI_V4DI_PTEST:
36620 case INT_FTYPE_V4DF_V4DF_PTEST:
36621 case INT_FTYPE_V4SF_V4SF_PTEST:
36622 case INT_FTYPE_V2DI_V2DI_PTEST:
36623 case INT_FTYPE_V2DF_V2DF_PTEST:
36624 return ix86_expand_sse_ptest (d, exp, target);
36625 case FLOAT128_FTYPE_FLOAT128:
36626 case FLOAT_FTYPE_FLOAT:
36627 case INT_FTYPE_INT:
36628 case UINT64_FTYPE_INT:
36629 case UINT16_FTYPE_UINT16:
36630 case INT64_FTYPE_INT64:
36631 case INT64_FTYPE_V4SF:
36632 case INT64_FTYPE_V2DF:
36633 case INT_FTYPE_V16QI:
36634 case INT_FTYPE_V8QI:
36635 case INT_FTYPE_V8SF:
36636 case INT_FTYPE_V4DF:
36637 case INT_FTYPE_V4SF:
36638 case INT_FTYPE_V2DF:
36639 case INT_FTYPE_V32QI:
36640 case V16QI_FTYPE_V16QI:
36641 case V8SI_FTYPE_V8SF:
36642 case V8SI_FTYPE_V4SI:
36643 case V8HI_FTYPE_V8HI:
36644 case V8HI_FTYPE_V16QI:
36645 case V8QI_FTYPE_V8QI:
36646 case V8SF_FTYPE_V8SF:
36647 case V8SF_FTYPE_V8SI:
36648 case V8SF_FTYPE_V4SF:
36649 case V8SF_FTYPE_V8HI:
36650 case V4SI_FTYPE_V4SI:
36651 case V4SI_FTYPE_V16QI:
36652 case V4SI_FTYPE_V4SF:
36653 case V4SI_FTYPE_V8SI:
36654 case V4SI_FTYPE_V8HI:
36655 case V4SI_FTYPE_V4DF:
36656 case V4SI_FTYPE_V2DF:
36657 case V4HI_FTYPE_V4HI:
36658 case V4DF_FTYPE_V4DF:
36659 case V4DF_FTYPE_V4SI:
36660 case V4DF_FTYPE_V4SF:
36661 case V4DF_FTYPE_V2DF:
36662 case V4SF_FTYPE_V4SF:
36663 case V4SF_FTYPE_V4SI:
36664 case V4SF_FTYPE_V8SF:
36665 case V4SF_FTYPE_V4DF:
36666 case V4SF_FTYPE_V8HI:
36667 case V4SF_FTYPE_V2DF:
36668 case V2DI_FTYPE_V2DI:
36669 case V2DI_FTYPE_V16QI:
36670 case V2DI_FTYPE_V8HI:
36671 case V2DI_FTYPE_V4SI:
36672 case V2DF_FTYPE_V2DF:
36673 case V2DF_FTYPE_V4SI:
36674 case V2DF_FTYPE_V4DF:
36675 case V2DF_FTYPE_V4SF:
36676 case V2DF_FTYPE_V2SI:
36677 case V2SI_FTYPE_V2SI:
36678 case V2SI_FTYPE_V4SF:
36679 case V2SI_FTYPE_V2SF:
36680 case V2SI_FTYPE_V2DF:
36681 case V2SF_FTYPE_V2SF:
36682 case V2SF_FTYPE_V2SI:
36683 case V32QI_FTYPE_V32QI:
36684 case V32QI_FTYPE_V16QI:
36685 case V16HI_FTYPE_V16HI:
36686 case V16HI_FTYPE_V8HI:
36687 case V8SI_FTYPE_V8SI:
36688 case V16HI_FTYPE_V16QI:
36689 case V8SI_FTYPE_V16QI:
36690 case V4DI_FTYPE_V16QI:
36691 case V8SI_FTYPE_V8HI:
36692 case V4DI_FTYPE_V8HI:
36693 case V4DI_FTYPE_V4SI:
36694 case V4DI_FTYPE_V2DI:
36695 case HI_FTYPE_HI:
36696 case HI_FTYPE_V16QI:
36697 case SI_FTYPE_V32QI:
36698 case DI_FTYPE_V64QI:
36699 case V16QI_FTYPE_HI:
36700 case V32QI_FTYPE_SI:
36701 case V64QI_FTYPE_DI:
36702 case V8HI_FTYPE_QI:
36703 case V16HI_FTYPE_HI:
36704 case V32HI_FTYPE_SI:
36705 case V4SI_FTYPE_QI:
36706 case V8SI_FTYPE_QI:
36707 case V4SI_FTYPE_HI:
36708 case V8SI_FTYPE_HI:
36709 case QI_FTYPE_V8HI:
36710 case HI_FTYPE_V16HI:
36711 case SI_FTYPE_V32HI:
36712 case QI_FTYPE_V4SI:
36713 case QI_FTYPE_V8SI:
36714 case HI_FTYPE_V16SI:
36715 case QI_FTYPE_V2DI:
36716 case QI_FTYPE_V4DI:
36717 case QI_FTYPE_V8DI:
36718 case UINT_FTYPE_V2DF:
36719 case UINT_FTYPE_V4SF:
36720 case UINT64_FTYPE_V2DF:
36721 case UINT64_FTYPE_V4SF:
36722 case V16QI_FTYPE_V8DI:
36723 case V16HI_FTYPE_V16SI:
36724 case V16SI_FTYPE_HI:
36725 case V2DI_FTYPE_QI:
36726 case V4DI_FTYPE_QI:
36727 case V16SI_FTYPE_V16SI:
36728 case V16SI_FTYPE_INT:
36729 case V16SF_FTYPE_FLOAT:
36730 case V16SF_FTYPE_V8SF:
36731 case V16SI_FTYPE_V8SI:
36732 case V16SF_FTYPE_V4SF:
36733 case V16SI_FTYPE_V4SI:
36734 case V16SF_FTYPE_V16SF:
36735 case V8HI_FTYPE_V8DI:
36736 case V8UHI_FTYPE_V8UHI:
36737 case V8SI_FTYPE_V8DI:
36738 case V8SF_FTYPE_V8DF:
36739 case V8DI_FTYPE_QI:
36740 case V8DI_FTYPE_INT64:
36741 case V8DI_FTYPE_V4DI:
36742 case V8DI_FTYPE_V8DI:
36743 case V8DF_FTYPE_DOUBLE:
36744 case V8DF_FTYPE_V4DF:
36745 case V8DF_FTYPE_V2DF:
36746 case V8DF_FTYPE_V8DF:
36747 case V8DF_FTYPE_V8SI:
36748 nargs = 1;
36749 break;
36750 case V4SF_FTYPE_V4SF_VEC_MERGE:
36751 case V2DF_FTYPE_V2DF_VEC_MERGE:
36752 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36753 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36754 case V16QI_FTYPE_V16QI_V16QI:
36755 case V16QI_FTYPE_V8HI_V8HI:
36756 case V16SI_FTYPE_V16SI_V16SI:
36757 case V16SF_FTYPE_V16SF_V16SF:
36758 case V16SF_FTYPE_V16SF_V16SI:
36759 case V8QI_FTYPE_V8QI_V8QI:
36760 case V8QI_FTYPE_V4HI_V4HI:
36761 case V8HI_FTYPE_V8HI_V8HI:
36762 case V8HI_FTYPE_V16QI_V16QI:
36763 case V8HI_FTYPE_V4SI_V4SI:
36764 case V8SF_FTYPE_V8SF_V8SF:
36765 case V8SF_FTYPE_V8SF_V8SI:
36766 case V8DI_FTYPE_V8DI_V8DI:
36767 case V8DF_FTYPE_V8DF_V8DF:
36768 case V8DF_FTYPE_V8DF_V8DI:
36769 case V4SI_FTYPE_V4SI_V4SI:
36770 case V4SI_FTYPE_V8HI_V8HI:
36771 case V4SI_FTYPE_V4SF_V4SF:
36772 case V4SI_FTYPE_V2DF_V2DF:
36773 case V4HI_FTYPE_V4HI_V4HI:
36774 case V4HI_FTYPE_V8QI_V8QI:
36775 case V4HI_FTYPE_V2SI_V2SI:
36776 case V4DF_FTYPE_V4DF_V4DF:
36777 case V4DF_FTYPE_V4DF_V4DI:
36778 case V4SF_FTYPE_V4SF_V4SF:
36779 case V4SF_FTYPE_V4SF_V4SI:
36780 case V4SF_FTYPE_V4SF_V2SI:
36781 case V4SF_FTYPE_V4SF_V2DF:
36782 case V4SF_FTYPE_V4SF_UINT:
36783 case V4SF_FTYPE_V4SF_UINT64:
36784 case V4SF_FTYPE_V4SF_DI:
36785 case V4SF_FTYPE_V4SF_SI:
36786 case V2DI_FTYPE_V2DI_V2DI:
36787 case V2DI_FTYPE_V16QI_V16QI:
36788 case V2DI_FTYPE_V4SI_V4SI:
36789 case V2UDI_FTYPE_V4USI_V4USI:
36790 case V2DI_FTYPE_V2DI_V16QI:
36791 case V2DI_FTYPE_V2DF_V2DF:
36792 case V2SI_FTYPE_V2SI_V2SI:
36793 case V2SI_FTYPE_V4HI_V4HI:
36794 case V2SI_FTYPE_V2SF_V2SF:
36795 case V2DF_FTYPE_V2DF_V2DF:
36796 case V2DF_FTYPE_V2DF_V4SF:
36797 case V2DF_FTYPE_V2DF_V2DI:
36798 case V2DF_FTYPE_V2DF_DI:
36799 case V2DF_FTYPE_V2DF_SI:
36800 case V2DF_FTYPE_V2DF_UINT:
36801 case V2DF_FTYPE_V2DF_UINT64:
36802 case V2SF_FTYPE_V2SF_V2SF:
36803 case V1DI_FTYPE_V1DI_V1DI:
36804 case V1DI_FTYPE_V8QI_V8QI:
36805 case V1DI_FTYPE_V2SI_V2SI:
36806 case V32QI_FTYPE_V16HI_V16HI:
36807 case V16HI_FTYPE_V8SI_V8SI:
36808 case V32QI_FTYPE_V32QI_V32QI:
36809 case V16HI_FTYPE_V32QI_V32QI:
36810 case V16HI_FTYPE_V16HI_V16HI:
36811 case V8SI_FTYPE_V4DF_V4DF:
36812 case V8SI_FTYPE_V8SI_V8SI:
36813 case V8SI_FTYPE_V16HI_V16HI:
36814 case V4DI_FTYPE_V4DI_V4DI:
36815 case V4DI_FTYPE_V8SI_V8SI:
36816 case V4UDI_FTYPE_V8USI_V8USI:
36817 case QI_FTYPE_V8DI_V8DI:
36818 case V8DI_FTYPE_V64QI_V64QI:
36819 case HI_FTYPE_V16SI_V16SI:
36820 if (comparison == UNKNOWN)
36821 return ix86_expand_binop_builtin (icode, exp, target);
36822 nargs = 2;
36823 break;
36824 case V4SF_FTYPE_V4SF_V4SF_SWAP:
36825 case V2DF_FTYPE_V2DF_V2DF_SWAP:
36826 gcc_assert (comparison != UNKNOWN);
36827 nargs = 2;
36828 swap = true;
36829 break;
36830 case V16HI_FTYPE_V16HI_V8HI_COUNT:
36831 case V16HI_FTYPE_V16HI_SI_COUNT:
36832 case V8SI_FTYPE_V8SI_V4SI_COUNT:
36833 case V8SI_FTYPE_V8SI_SI_COUNT:
36834 case V4DI_FTYPE_V4DI_V2DI_COUNT:
36835 case V4DI_FTYPE_V4DI_INT_COUNT:
36836 case V8HI_FTYPE_V8HI_V8HI_COUNT:
36837 case V8HI_FTYPE_V8HI_SI_COUNT:
36838 case V4SI_FTYPE_V4SI_V4SI_COUNT:
36839 case V4SI_FTYPE_V4SI_SI_COUNT:
36840 case V4HI_FTYPE_V4HI_V4HI_COUNT:
36841 case V4HI_FTYPE_V4HI_SI_COUNT:
36842 case V2DI_FTYPE_V2DI_V2DI_COUNT:
36843 case V2DI_FTYPE_V2DI_SI_COUNT:
36844 case V2SI_FTYPE_V2SI_V2SI_COUNT:
36845 case V2SI_FTYPE_V2SI_SI_COUNT:
36846 case V1DI_FTYPE_V1DI_V1DI_COUNT:
36847 case V1DI_FTYPE_V1DI_SI_COUNT:
36848 nargs = 2;
36849 last_arg_count = true;
36850 break;
36851 case UINT64_FTYPE_UINT64_UINT64:
36852 case UINT_FTYPE_UINT_UINT:
36853 case UINT_FTYPE_UINT_USHORT:
36854 case UINT_FTYPE_UINT_UCHAR:
36855 case UINT16_FTYPE_UINT16_INT:
36856 case UINT8_FTYPE_UINT8_INT:
36857 case HI_FTYPE_HI_HI:
36858 case SI_FTYPE_SI_SI:
36859 case DI_FTYPE_DI_DI:
36860 case V16SI_FTYPE_V8DF_V8DF:
36861 nargs = 2;
36862 break;
36863 case V2DI_FTYPE_V2DI_INT_CONVERT:
36864 nargs = 2;
36865 rmode = V1TImode;
36866 nargs_constant = 1;
36867 break;
36868 case V4DI_FTYPE_V4DI_INT_CONVERT:
36869 nargs = 2;
36870 rmode = V2TImode;
36871 nargs_constant = 1;
36872 break;
36873 case V8DI_FTYPE_V8DI_INT_CONVERT:
36874 nargs = 2;
36875 rmode = V4TImode;
36876 nargs_constant = 1;
36877 break;
36878 case V8HI_FTYPE_V8HI_INT:
36879 case V8HI_FTYPE_V8SF_INT:
36880 case V16HI_FTYPE_V16SF_INT:
36881 case V8HI_FTYPE_V4SF_INT:
36882 case V8SF_FTYPE_V8SF_INT:
36883 case V4SF_FTYPE_V16SF_INT:
36884 case V16SF_FTYPE_V16SF_INT:
36885 case V4SI_FTYPE_V4SI_INT:
36886 case V4SI_FTYPE_V8SI_INT:
36887 case V4HI_FTYPE_V4HI_INT:
36888 case V4DF_FTYPE_V4DF_INT:
36889 case V4DF_FTYPE_V8DF_INT:
36890 case V4SF_FTYPE_V4SF_INT:
36891 case V4SF_FTYPE_V8SF_INT:
36892 case V2DI_FTYPE_V2DI_INT:
36893 case V2DF_FTYPE_V2DF_INT:
36894 case V2DF_FTYPE_V4DF_INT:
36895 case V16HI_FTYPE_V16HI_INT:
36896 case V8SI_FTYPE_V8SI_INT:
36897 case V16SI_FTYPE_V16SI_INT:
36898 case V4SI_FTYPE_V16SI_INT:
36899 case V4DI_FTYPE_V4DI_INT:
36900 case V2DI_FTYPE_V4DI_INT:
36901 case V4DI_FTYPE_V8DI_INT:
36902 case HI_FTYPE_HI_INT:
36903 case QI_FTYPE_V4SF_INT:
36904 case QI_FTYPE_V2DF_INT:
36905 nargs = 2;
36906 nargs_constant = 1;
36907 break;
36908 case V16QI_FTYPE_V16QI_V16QI_V16QI:
36909 case V8SF_FTYPE_V8SF_V8SF_V8SF:
36910 case V4DF_FTYPE_V4DF_V4DF_V4DF:
36911 case V4SF_FTYPE_V4SF_V4SF_V4SF:
36912 case V2DF_FTYPE_V2DF_V2DF_V2DF:
36913 case V32QI_FTYPE_V32QI_V32QI_V32QI:
36914 case HI_FTYPE_V16SI_V16SI_HI:
36915 case QI_FTYPE_V8DI_V8DI_QI:
36916 case V16HI_FTYPE_V16SI_V16HI_HI:
36917 case V16QI_FTYPE_V16SI_V16QI_HI:
36918 case V16QI_FTYPE_V8DI_V16QI_QI:
36919 case V16SF_FTYPE_V16SF_V16SF_HI:
36920 case V16SF_FTYPE_V16SF_V16SF_V16SF:
36921 case V16SF_FTYPE_V16SF_V16SI_V16SF:
36922 case V16SF_FTYPE_V16SI_V16SF_HI:
36923 case V16SF_FTYPE_V16SI_V16SF_V16SF:
36924 case V16SF_FTYPE_V4SF_V16SF_HI:
36925 case V16SI_FTYPE_SI_V16SI_HI:
36926 case V16SI_FTYPE_V16HI_V16SI_HI:
36927 case V16SI_FTYPE_V16QI_V16SI_HI:
36928 case V16SI_FTYPE_V16SF_V16SI_HI:
36929 case V8SF_FTYPE_V4SF_V8SF_QI:
36930 case V4DF_FTYPE_V2DF_V4DF_QI:
36931 case V8SI_FTYPE_V4SI_V8SI_QI:
36932 case V8SI_FTYPE_SI_V8SI_QI:
36933 case V4SI_FTYPE_V4SI_V4SI_QI:
36934 case V4SI_FTYPE_SI_V4SI_QI:
36935 case V4DI_FTYPE_V2DI_V4DI_QI:
36936 case V4DI_FTYPE_DI_V4DI_QI:
36937 case V2DI_FTYPE_V2DI_V2DI_QI:
36938 case V2DI_FTYPE_DI_V2DI_QI:
36939 case V64QI_FTYPE_V64QI_V64QI_DI:
36940 case V64QI_FTYPE_V16QI_V64QI_DI:
36941 case V64QI_FTYPE_QI_V64QI_DI:
36942 case V32QI_FTYPE_V32QI_V32QI_SI:
36943 case V32QI_FTYPE_V16QI_V32QI_SI:
36944 case V32QI_FTYPE_QI_V32QI_SI:
36945 case V16QI_FTYPE_V16QI_V16QI_HI:
36946 case V16QI_FTYPE_QI_V16QI_HI:
36947 case V32HI_FTYPE_V8HI_V32HI_SI:
36948 case V32HI_FTYPE_HI_V32HI_SI:
36949 case V16HI_FTYPE_V8HI_V16HI_HI:
36950 case V16HI_FTYPE_HI_V16HI_HI:
36951 case V8HI_FTYPE_V8HI_V8HI_QI:
36952 case V8HI_FTYPE_HI_V8HI_QI:
36953 case V8SF_FTYPE_V8HI_V8SF_QI:
36954 case V4SF_FTYPE_V8HI_V4SF_QI:
36955 case V8SI_FTYPE_V8SF_V8SI_QI:
36956 case V4SI_FTYPE_V4SF_V4SI_QI:
36957 case V8DI_FTYPE_V8SF_V8DI_QI:
36958 case V4DI_FTYPE_V4SF_V4DI_QI:
36959 case V2DI_FTYPE_V4SF_V2DI_QI:
36960 case V8SF_FTYPE_V8DI_V8SF_QI:
36961 case V4SF_FTYPE_V4DI_V4SF_QI:
36962 case V4SF_FTYPE_V2DI_V4SF_QI:
36963 case V8DF_FTYPE_V8DI_V8DF_QI:
36964 case V4DF_FTYPE_V4DI_V4DF_QI:
36965 case V2DF_FTYPE_V2DI_V2DF_QI:
36966 case V16QI_FTYPE_V8HI_V16QI_QI:
36967 case V16QI_FTYPE_V16HI_V16QI_HI:
36968 case V16QI_FTYPE_V4SI_V16QI_QI:
36969 case V16QI_FTYPE_V8SI_V16QI_QI:
36970 case V8HI_FTYPE_V4SI_V8HI_QI:
36971 case V8HI_FTYPE_V8SI_V8HI_QI:
36972 case V16QI_FTYPE_V2DI_V16QI_QI:
36973 case V16QI_FTYPE_V4DI_V16QI_QI:
36974 case V8HI_FTYPE_V2DI_V8HI_QI:
36975 case V8HI_FTYPE_V4DI_V8HI_QI:
36976 case V4SI_FTYPE_V2DI_V4SI_QI:
36977 case V4SI_FTYPE_V4DI_V4SI_QI:
36978 case V32QI_FTYPE_V32HI_V32QI_SI:
36979 case HI_FTYPE_V16QI_V16QI_HI:
36980 case SI_FTYPE_V32QI_V32QI_SI:
36981 case DI_FTYPE_V64QI_V64QI_DI:
36982 case QI_FTYPE_V8HI_V8HI_QI:
36983 case HI_FTYPE_V16HI_V16HI_HI:
36984 case SI_FTYPE_V32HI_V32HI_SI:
36985 case QI_FTYPE_V4SI_V4SI_QI:
36986 case QI_FTYPE_V8SI_V8SI_QI:
36987 case QI_FTYPE_V2DI_V2DI_QI:
36988 case QI_FTYPE_V4DI_V4DI_QI:
36989 case V4SF_FTYPE_V2DF_V4SF_QI:
36990 case V4SF_FTYPE_V4DF_V4SF_QI:
36991 case V16SI_FTYPE_V16SI_V16SI_HI:
36992 case V16SI_FTYPE_V16SI_V16SI_V16SI:
36993 case V16SI_FTYPE_V4SI_V16SI_HI:
36994 case V2DI_FTYPE_V2DI_V2DI_V2DI:
36995 case V2DI_FTYPE_V4SI_V2DI_QI:
36996 case V2DI_FTYPE_V8HI_V2DI_QI:
36997 case V2DI_FTYPE_V16QI_V2DI_QI:
36998 case V4DI_FTYPE_V4DI_V4DI_QI:
36999 case V4DI_FTYPE_V4SI_V4DI_QI:
37000 case V4DI_FTYPE_V8HI_V4DI_QI:
37001 case V4DI_FTYPE_V16QI_V4DI_QI:
37002 case V8DI_FTYPE_V8DF_V8DI_QI:
37003 case V4DI_FTYPE_V4DF_V4DI_QI:
37004 case V2DI_FTYPE_V2DF_V2DI_QI:
37005 case V4SI_FTYPE_V4DF_V4SI_QI:
37006 case V4SI_FTYPE_V2DF_V4SI_QI:
37007 case V4SI_FTYPE_V8HI_V4SI_QI:
37008 case V4SI_FTYPE_V16QI_V4SI_QI:
37009 case V8SI_FTYPE_V8SI_V8SI_V8SI:
37010 case V4DI_FTYPE_V4DI_V4DI_V4DI:
37011 case V8DF_FTYPE_V2DF_V8DF_QI:
37012 case V8DF_FTYPE_V4DF_V8DF_QI:
37013 case V8DF_FTYPE_V8DF_V8DF_QI:
37014 case V8DF_FTYPE_V8DF_V8DF_V8DF:
37015 case V8SF_FTYPE_V8SF_V8SF_QI:
37016 case V8SF_FTYPE_V8SI_V8SF_QI:
37017 case V4DF_FTYPE_V4DF_V4DF_QI:
37018 case V4SF_FTYPE_V4SF_V4SF_QI:
37019 case V2DF_FTYPE_V2DF_V2DF_QI:
37020 case V2DF_FTYPE_V4SF_V2DF_QI:
37021 case V2DF_FTYPE_V4SI_V2DF_QI:
37022 case V4SF_FTYPE_V4SI_V4SF_QI:
37023 case V4DF_FTYPE_V4SF_V4DF_QI:
37024 case V4DF_FTYPE_V4SI_V4DF_QI:
37025 case V8SI_FTYPE_V8SI_V8SI_QI:
37026 case V8SI_FTYPE_V8HI_V8SI_QI:
37027 case V8SI_FTYPE_V16QI_V8SI_QI:
37028 case V8DF_FTYPE_V8DF_V8DI_V8DF:
37029 case V8DF_FTYPE_V8DI_V8DF_V8DF:
37030 case V8DF_FTYPE_V8SF_V8DF_QI:
37031 case V8DF_FTYPE_V8SI_V8DF_QI:
37032 case V8DI_FTYPE_DI_V8DI_QI:
37033 case V16SF_FTYPE_V8SF_V16SF_HI:
37034 case V16SI_FTYPE_V8SI_V16SI_HI:
37035 case V16HI_FTYPE_V16HI_V16HI_HI:
37036 case V8HI_FTYPE_V16QI_V8HI_QI:
37037 case V16HI_FTYPE_V16QI_V16HI_HI:
37038 case V32HI_FTYPE_V32HI_V32HI_SI:
37039 case V32HI_FTYPE_V32QI_V32HI_SI:
37040 case V8DI_FTYPE_V16QI_V8DI_QI:
37041 case V8DI_FTYPE_V2DI_V8DI_QI:
37042 case V8DI_FTYPE_V4DI_V8DI_QI:
37043 case V8DI_FTYPE_V8DI_V8DI_QI:
37044 case V8DI_FTYPE_V8DI_V8DI_V8DI:
37045 case V8DI_FTYPE_V8HI_V8DI_QI:
37046 case V8DI_FTYPE_V8SI_V8DI_QI:
37047 case V8HI_FTYPE_V8DI_V8HI_QI:
37048 case V8SF_FTYPE_V8DF_V8SF_QI:
37049 case V8SI_FTYPE_V8DF_V8SI_QI:
37050 case V8SI_FTYPE_V8DI_V8SI_QI:
37051 case V4SI_FTYPE_V4SI_V4SI_V4SI:
37052 nargs = 3;
37053 break;
37054 case V32QI_FTYPE_V32QI_V32QI_INT:
37055 case V16HI_FTYPE_V16HI_V16HI_INT:
37056 case V16QI_FTYPE_V16QI_V16QI_INT:
37057 case V4DI_FTYPE_V4DI_V4DI_INT:
37058 case V8HI_FTYPE_V8HI_V8HI_INT:
37059 case V8SI_FTYPE_V8SI_V8SI_INT:
37060 case V8SI_FTYPE_V8SI_V4SI_INT:
37061 case V8SF_FTYPE_V8SF_V8SF_INT:
37062 case V8SF_FTYPE_V8SF_V4SF_INT:
37063 case V4SI_FTYPE_V4SI_V4SI_INT:
37064 case V4DF_FTYPE_V4DF_V4DF_INT:
37065 case V16SF_FTYPE_V16SF_V16SF_INT:
37066 case V16SF_FTYPE_V16SF_V4SF_INT:
37067 case V16SI_FTYPE_V16SI_V4SI_INT:
37068 case V4DF_FTYPE_V4DF_V2DF_INT:
37069 case V4SF_FTYPE_V4SF_V4SF_INT:
37070 case V2DI_FTYPE_V2DI_V2DI_INT:
37071 case V4DI_FTYPE_V4DI_V2DI_INT:
37072 case V2DF_FTYPE_V2DF_V2DF_INT:
37073 case QI_FTYPE_V8DI_V8DI_INT:
37074 case QI_FTYPE_V8DF_V8DF_INT:
37075 case QI_FTYPE_V2DF_V2DF_INT:
37076 case QI_FTYPE_V4SF_V4SF_INT:
37077 case HI_FTYPE_V16SI_V16SI_INT:
37078 case HI_FTYPE_V16SF_V16SF_INT:
37079 nargs = 3;
37080 nargs_constant = 1;
37081 break;
37082 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
37083 nargs = 3;
37084 rmode = V4DImode;
37085 nargs_constant = 1;
37086 break;
37087 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
37088 nargs = 3;
37089 rmode = V2DImode;
37090 nargs_constant = 1;
37091 break;
37092 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
37093 nargs = 3;
37094 rmode = DImode;
37095 nargs_constant = 1;
37096 break;
37097 case V2DI_FTYPE_V2DI_UINT_UINT:
37098 nargs = 3;
37099 nargs_constant = 2;
37100 break;
37101 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
37102 nargs = 3;
37103 rmode = V8DImode;
37104 nargs_constant = 1;
37105 break;
37106 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
37107 nargs = 5;
37108 rmode = V8DImode;
37109 mask_pos = 2;
37110 nargs_constant = 1;
37111 break;
37112 case QI_FTYPE_V8DF_INT_QI:
37113 case QI_FTYPE_V4DF_INT_QI:
37114 case QI_FTYPE_V2DF_INT_QI:
37115 case HI_FTYPE_V16SF_INT_HI:
37116 case QI_FTYPE_V8SF_INT_QI:
37117 case QI_FTYPE_V4SF_INT_QI:
37118 nargs = 3;
37119 mask_pos = 1;
37120 nargs_constant = 1;
37121 break;
37122 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
37123 nargs = 5;
37124 rmode = V4DImode;
37125 mask_pos = 2;
37126 nargs_constant = 1;
37127 break;
37128 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
37129 nargs = 5;
37130 rmode = V2DImode;
37131 mask_pos = 2;
37132 nargs_constant = 1;
37133 break;
37134 case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
37135 case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
37136 case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
37137 case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
37138 case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
37139 case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
37140 case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
37141 case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
37142 case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
37143 case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
37144 case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
37145 case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
37146 case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
37147 case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
37148 case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
37149 case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
37150 case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
37151 case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
37152 case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
37153 case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
37154 case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
37155 case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
37156 case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
37157 case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
37158 case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
37159 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
37160 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
37161 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
37162 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
37163 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
37164 case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
37165 case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
37166 case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
37167 case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
37168 case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
37169 case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
37170 case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
37171 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
37172 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
37173 case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
37174 case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
37175 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
37176 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
37177 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
37178 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
37179 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
37180 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
37181 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
37182 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
37183 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
37184 case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
37185 case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
37186 case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
37187 case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
37188 nargs = 4;
37189 break;
37190 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
37191 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
37192 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
37193 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
37194 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
37195 nargs = 4;
37196 nargs_constant = 1;
37197 break;
37198 case QI_FTYPE_V4DI_V4DI_INT_QI:
37199 case QI_FTYPE_V8SI_V8SI_INT_QI:
37200 case QI_FTYPE_V4DF_V4DF_INT_QI:
37201 case QI_FTYPE_V8SF_V8SF_INT_QI:
37202 case QI_FTYPE_V2DI_V2DI_INT_QI:
37203 case QI_FTYPE_V4SI_V4SI_INT_QI:
37204 case QI_FTYPE_V2DF_V2DF_INT_QI:
37205 case QI_FTYPE_V4SF_V4SF_INT_QI:
37206 case DI_FTYPE_V64QI_V64QI_INT_DI:
37207 case SI_FTYPE_V32QI_V32QI_INT_SI:
37208 case HI_FTYPE_V16QI_V16QI_INT_HI:
37209 case SI_FTYPE_V32HI_V32HI_INT_SI:
37210 case HI_FTYPE_V16HI_V16HI_INT_HI:
37211 case QI_FTYPE_V8HI_V8HI_INT_QI:
37212 nargs = 4;
37213 mask_pos = 1;
37214 nargs_constant = 1;
37215 break;
37216 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
37217 nargs = 4;
37218 nargs_constant = 2;
37219 break;
37220 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
37221 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
37222 nargs = 4;
37223 break;
37224 case QI_FTYPE_V8DI_V8DI_INT_QI:
37225 case HI_FTYPE_V16SI_V16SI_INT_HI:
37226 case QI_FTYPE_V8DF_V8DF_INT_QI:
37227 case HI_FTYPE_V16SF_V16SF_INT_HI:
37228 mask_pos = 1;
37229 nargs = 4;
37230 nargs_constant = 1;
37231 break;
37232 case V8SF_FTYPE_V8SF_INT_V8SF_QI:
37233 case V4SF_FTYPE_V4SF_INT_V4SF_QI:
37234 case V2DF_FTYPE_V4DF_INT_V2DF_QI:
37235 case V2DI_FTYPE_V4DI_INT_V2DI_QI:
37236 case V8SF_FTYPE_V16SF_INT_V8SF_QI:
37237 case V8SI_FTYPE_V16SI_INT_V8SI_QI:
37238 case V2DF_FTYPE_V8DF_INT_V2DF_QI:
37239 case V2DI_FTYPE_V8DI_INT_V2DI_QI:
37240 case V4SF_FTYPE_V8SF_INT_V4SF_QI:
37241 case V4SI_FTYPE_V8SI_INT_V4SI_QI:
37242 case V8HI_FTYPE_V8SF_INT_V8HI_QI:
37243 case V8HI_FTYPE_V4SF_INT_V8HI_QI:
37244 case V32HI_FTYPE_V32HI_INT_V32HI_SI:
37245 case V16HI_FTYPE_V16HI_INT_V16HI_HI:
37246 case V8HI_FTYPE_V8HI_INT_V8HI_QI:
37247 case V4DI_FTYPE_V4DI_INT_V4DI_QI:
37248 case V2DI_FTYPE_V2DI_INT_V2DI_QI:
37249 case V8SI_FTYPE_V8SI_INT_V8SI_QI:
37250 case V4SI_FTYPE_V4SI_INT_V4SI_QI:
37251 case V4DF_FTYPE_V4DF_INT_V4DF_QI:
37252 case V2DF_FTYPE_V2DF_INT_V2DF_QI:
37253 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
37254 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
37255 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
37256 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
37257 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
37258 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
37259 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
37260 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
37261 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
37262 nargs = 4;
37263 mask_pos = 2;
37264 nargs_constant = 1;
37265 break;
37266 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
37267 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
37268 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
37269 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
37270 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
37271 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
37272 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
37273 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
37274 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
37275 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
37276 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
37277 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
37278 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
37279 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
37280 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
37281 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
37282 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
37283 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
37284 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
37285 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
37286 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
37287 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
37288 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
37289 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
37290 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
37291 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
37292 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
37293 nargs = 5;
37294 mask_pos = 2;
37295 nargs_constant = 1;
37296 break;
37297 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
37298 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
37299 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
37300 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
37301 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
37302 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
37303 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
37304 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
37305 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
37306 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
37307 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
37308 nargs = 5;
37309 nargs = 5;
37310 mask_pos = 1;
37311 nargs_constant = 1;
37312 break;
37314 default:
37315 gcc_unreachable ();
37318 gcc_assert (nargs <= ARRAY_SIZE (args));
37320 if (comparison != UNKNOWN)
37322 gcc_assert (nargs == 2);
37323 return ix86_expand_sse_compare (d, exp, target, swap);
37326 if (rmode == VOIDmode || rmode == tmode)
37328 if (optimize
37329 || target == 0
37330 || GET_MODE (target) != tmode
37331 || !insn_p->operand[0].predicate (target, tmode))
37332 target = gen_reg_rtx (tmode);
37333 real_target = target;
37335 else
37337 real_target = gen_reg_rtx (tmode);
37338 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
37341 for (i = 0; i < nargs; i++)
37343 tree arg = CALL_EXPR_ARG (exp, i);
37344 rtx op = expand_normal (arg);
37345 machine_mode mode = insn_p->operand[i + 1].mode;
37346 bool match = insn_p->operand[i + 1].predicate (op, mode);
37348 if (last_arg_count && (i + 1) == nargs)
37350 /* SIMD shift insns take either an 8-bit immediate or
37351 register as count. But builtin functions take int as
37352 count. If count doesn't match, we put it in register. */
37353 if (!match)
37355 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
37356 if (!insn_p->operand[i + 1].predicate (op, mode))
37357 op = copy_to_reg (op);
37360 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37361 (!mask_pos && (nargs - i) <= nargs_constant))
37363 if (!match)
37364 switch (icode)
37366 case CODE_FOR_avx_vinsertf128v4di:
37367 case CODE_FOR_avx_vextractf128v4di:
37368 error ("the last argument must be an 1-bit immediate");
37369 return const0_rtx;
37371 case CODE_FOR_avx512f_cmpv8di3_mask:
37372 case CODE_FOR_avx512f_cmpv16si3_mask:
37373 case CODE_FOR_avx512f_ucmpv8di3_mask:
37374 case CODE_FOR_avx512f_ucmpv16si3_mask:
37375 case CODE_FOR_avx512vl_cmpv4di3_mask:
37376 case CODE_FOR_avx512vl_cmpv8si3_mask:
37377 case CODE_FOR_avx512vl_ucmpv4di3_mask:
37378 case CODE_FOR_avx512vl_ucmpv8si3_mask:
37379 case CODE_FOR_avx512vl_cmpv2di3_mask:
37380 case CODE_FOR_avx512vl_cmpv4si3_mask:
37381 case CODE_FOR_avx512vl_ucmpv2di3_mask:
37382 case CODE_FOR_avx512vl_ucmpv4si3_mask:
37383 error ("the last argument must be a 3-bit immediate");
37384 return const0_rtx;
37386 case CODE_FOR_sse4_1_roundsd:
37387 case CODE_FOR_sse4_1_roundss:
37389 case CODE_FOR_sse4_1_roundpd:
37390 case CODE_FOR_sse4_1_roundps:
37391 case CODE_FOR_avx_roundpd256:
37392 case CODE_FOR_avx_roundps256:
37394 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37395 case CODE_FOR_sse4_1_roundps_sfix:
37396 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37397 case CODE_FOR_avx_roundps_sfix256:
37399 case CODE_FOR_sse4_1_blendps:
37400 case CODE_FOR_avx_blendpd256:
37401 case CODE_FOR_avx_vpermilv4df:
37402 case CODE_FOR_avx_vpermilv4df_mask:
37403 case CODE_FOR_avx512f_getmantv8df_mask:
37404 case CODE_FOR_avx512f_getmantv16sf_mask:
37405 case CODE_FOR_avx512vl_getmantv8sf_mask:
37406 case CODE_FOR_avx512vl_getmantv4df_mask:
37407 case CODE_FOR_avx512vl_getmantv4sf_mask:
37408 case CODE_FOR_avx512vl_getmantv2df_mask:
37409 case CODE_FOR_avx512dq_rangepv8df_mask_round:
37410 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37411 case CODE_FOR_avx512dq_rangepv4df_mask:
37412 case CODE_FOR_avx512dq_rangepv8sf_mask:
37413 case CODE_FOR_avx512dq_rangepv2df_mask:
37414 case CODE_FOR_avx512dq_rangepv4sf_mask:
37415 case CODE_FOR_avx_shufpd256_mask:
37416 error ("the last argument must be a 4-bit immediate");
37417 return const0_rtx;
37419 case CODE_FOR_sha1rnds4:
37420 case CODE_FOR_sse4_1_blendpd:
37421 case CODE_FOR_avx_vpermilv2df:
37422 case CODE_FOR_avx_vpermilv2df_mask:
37423 case CODE_FOR_xop_vpermil2v2df3:
37424 case CODE_FOR_xop_vpermil2v4sf3:
37425 case CODE_FOR_xop_vpermil2v4df3:
37426 case CODE_FOR_xop_vpermil2v8sf3:
37427 case CODE_FOR_avx512f_vinsertf32x4_mask:
37428 case CODE_FOR_avx512f_vinserti32x4_mask:
37429 case CODE_FOR_avx512f_vextractf32x4_mask:
37430 case CODE_FOR_avx512f_vextracti32x4_mask:
37431 case CODE_FOR_sse2_shufpd:
37432 case CODE_FOR_sse2_shufpd_mask:
37433 case CODE_FOR_avx512dq_shuf_f64x2_mask:
37434 case CODE_FOR_avx512dq_shuf_i64x2_mask:
37435 case CODE_FOR_avx512vl_shuf_i32x4_mask:
37436 case CODE_FOR_avx512vl_shuf_f32x4_mask:
37437 error ("the last argument must be a 2-bit immediate");
37438 return const0_rtx;
37440 case CODE_FOR_avx_vextractf128v4df:
37441 case CODE_FOR_avx_vextractf128v8sf:
37442 case CODE_FOR_avx_vextractf128v8si:
37443 case CODE_FOR_avx_vinsertf128v4df:
37444 case CODE_FOR_avx_vinsertf128v8sf:
37445 case CODE_FOR_avx_vinsertf128v8si:
37446 case CODE_FOR_avx512f_vinsertf64x4_mask:
37447 case CODE_FOR_avx512f_vinserti64x4_mask:
37448 case CODE_FOR_avx512f_vextractf64x4_mask:
37449 case CODE_FOR_avx512f_vextracti64x4_mask:
37450 case CODE_FOR_avx512dq_vinsertf32x8_mask:
37451 case CODE_FOR_avx512dq_vinserti32x8_mask:
37452 case CODE_FOR_avx512vl_vinsertv4df:
37453 case CODE_FOR_avx512vl_vinsertv4di:
37454 case CODE_FOR_avx512vl_vinsertv8sf:
37455 case CODE_FOR_avx512vl_vinsertv8si:
37456 error ("the last argument must be a 1-bit immediate");
37457 return const0_rtx;
37459 case CODE_FOR_avx_vmcmpv2df3:
37460 case CODE_FOR_avx_vmcmpv4sf3:
37461 case CODE_FOR_avx_cmpv2df3:
37462 case CODE_FOR_avx_cmpv4sf3:
37463 case CODE_FOR_avx_cmpv4df3:
37464 case CODE_FOR_avx_cmpv8sf3:
37465 case CODE_FOR_avx512f_cmpv8df3_mask:
37466 case CODE_FOR_avx512f_cmpv16sf3_mask:
37467 case CODE_FOR_avx512f_vmcmpv2df3_mask:
37468 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37469 error ("the last argument must be a 5-bit immediate");
37470 return const0_rtx;
37472 default:
37473 switch (nargs_constant)
37475 case 2:
37476 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37477 (!mask_pos && (nargs - i) == nargs_constant))
37479 error ("the next to last argument must be an 8-bit immediate");
37480 break;
37482 case 1:
37483 error ("the last argument must be an 8-bit immediate");
37484 break;
37485 default:
37486 gcc_unreachable ();
37488 return const0_rtx;
37491 else
37493 if (VECTOR_MODE_P (mode))
37494 op = safe_vector_operand (op, mode);
37496 /* If we aren't optimizing, only allow one memory operand to
37497 be generated. */
37498 if (memory_operand (op, mode))
37499 num_memory++;
37501 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37503 if (optimize || !match || num_memory > 1)
37504 op = copy_to_mode_reg (mode, op);
37506 else
37508 op = copy_to_reg (op);
37509 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37513 args[i].op = op;
37514 args[i].mode = mode;
37517 switch (nargs)
37519 case 1:
37520 pat = GEN_FCN (icode) (real_target, args[0].op);
37521 break;
37522 case 2:
37523 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37524 break;
37525 case 3:
37526 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37527 args[2].op);
37528 break;
37529 case 4:
37530 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37531 args[2].op, args[3].op);
37532 break;
37533 case 5:
37534 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37535 args[2].op, args[3].op, args[4].op);
37536 case 6:
37537 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37538 args[2].op, args[3].op, args[4].op,
37539 args[5].op);
37540 break;
37541 default:
37542 gcc_unreachable ();
37545 if (! pat)
37546 return 0;
37548 emit_insn (pat);
37549 return target;
37552 /* Transform pattern of following layout:
37553 (parallel [
37554 set (A B)
37555 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37557 into:
37558 (set (A B))
37561 (parallel [ A B
37563 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37566 into:
37567 (parallel [ A B ... ]) */
37569 static rtx
37570 ix86_erase_embedded_rounding (rtx pat)
37572 if (GET_CODE (pat) == INSN)
37573 pat = PATTERN (pat);
37575 gcc_assert (GET_CODE (pat) == PARALLEL);
37577 if (XVECLEN (pat, 0) == 2)
37579 rtx p0 = XVECEXP (pat, 0, 0);
37580 rtx p1 = XVECEXP (pat, 0, 1);
37582 gcc_assert (GET_CODE (p0) == SET
37583 && GET_CODE (p1) == UNSPEC
37584 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37586 return p0;
37588 else
37590 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37591 int i = 0;
37592 int j = 0;
37594 for (; i < XVECLEN (pat, 0); ++i)
37596 rtx elem = XVECEXP (pat, 0, i);
37597 if (GET_CODE (elem) != UNSPEC
37598 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37599 res [j++] = elem;
37602 /* No more than 1 occurence was removed. */
37603 gcc_assert (j >= XVECLEN (pat, 0) - 1);
37605 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37609 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
37610 with rounding. */
37611 static rtx
37612 ix86_expand_sse_comi_round (const struct builtin_description *d,
37613 tree exp, rtx target)
37615 rtx pat, set_dst;
37616 tree arg0 = CALL_EXPR_ARG (exp, 0);
37617 tree arg1 = CALL_EXPR_ARG (exp, 1);
37618 tree arg2 = CALL_EXPR_ARG (exp, 2);
37619 tree arg3 = CALL_EXPR_ARG (exp, 3);
37620 rtx op0 = expand_normal (arg0);
37621 rtx op1 = expand_normal (arg1);
37622 rtx op2 = expand_normal (arg2);
37623 rtx op3 = expand_normal (arg3);
37624 enum insn_code icode = d->icode;
37625 const struct insn_data_d *insn_p = &insn_data[icode];
37626 machine_mode mode0 = insn_p->operand[0].mode;
37627 machine_mode mode1 = insn_p->operand[1].mode;
37628 enum rtx_code comparison = UNEQ;
37629 bool need_ucomi = false;
37631 /* See avxintrin.h for values. */
37632 enum rtx_code comi_comparisons[32] =
37634 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37635 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37636 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37638 bool need_ucomi_values[32] =
37640 true, false, false, true, true, false, false, true,
37641 true, false, false, true, true, false, false, true,
37642 false, true, true, false, false, true, true, false,
37643 false, true, true, false, false, true, true, false
37646 if (!CONST_INT_P (op2))
37648 error ("the third argument must be comparison constant");
37649 return const0_rtx;
37651 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37653 error ("incorect comparison mode");
37654 return const0_rtx;
37657 if (!insn_p->operand[2].predicate (op3, SImode))
37659 error ("incorrect rounding operand");
37660 return const0_rtx;
37663 comparison = comi_comparisons[INTVAL (op2)];
37664 need_ucomi = need_ucomi_values[INTVAL (op2)];
37666 if (VECTOR_MODE_P (mode0))
37667 op0 = safe_vector_operand (op0, mode0);
37668 if (VECTOR_MODE_P (mode1))
37669 op1 = safe_vector_operand (op1, mode1);
37671 target = gen_reg_rtx (SImode);
37672 emit_move_insn (target, const0_rtx);
37673 target = gen_rtx_SUBREG (QImode, target, 0);
37675 if ((optimize && !register_operand (op0, mode0))
37676 || !insn_p->operand[0].predicate (op0, mode0))
37677 op0 = copy_to_mode_reg (mode0, op0);
37678 if ((optimize && !register_operand (op1, mode1))
37679 || !insn_p->operand[1].predicate (op1, mode1))
37680 op1 = copy_to_mode_reg (mode1, op1);
37682 if (need_ucomi)
37683 icode = icode == CODE_FOR_sse_comi_round
37684 ? CODE_FOR_sse_ucomi_round
37685 : CODE_FOR_sse2_ucomi_round;
37687 pat = GEN_FCN (icode) (op0, op1, op3);
37688 if (! pat)
37689 return 0;
37691 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
37692 if (INTVAL (op3) == NO_ROUND)
37694 pat = ix86_erase_embedded_rounding (pat);
37695 if (! pat)
37696 return 0;
37698 set_dst = SET_DEST (pat);
37700 else
37702 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37703 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37706 emit_insn (pat);
37707 emit_insn (gen_rtx_SET (VOIDmode,
37708 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37709 gen_rtx_fmt_ee (comparison, QImode,
37710 set_dst,
37711 const0_rtx)));
37713 return SUBREG_REG (target);
37716 static rtx
37717 ix86_expand_round_builtin (const struct builtin_description *d,
37718 tree exp, rtx target)
37720 rtx pat;
37721 unsigned int i, nargs;
37722 struct
37724 rtx op;
37725 machine_mode mode;
37726 } args[6];
37727 enum insn_code icode = d->icode;
37728 const struct insn_data_d *insn_p = &insn_data[icode];
37729 machine_mode tmode = insn_p->operand[0].mode;
37730 unsigned int nargs_constant = 0;
37731 unsigned int redundant_embed_rnd = 0;
37733 switch ((enum ix86_builtin_func_type) d->flag)
37735 case UINT64_FTYPE_V2DF_INT:
37736 case UINT64_FTYPE_V4SF_INT:
37737 case UINT_FTYPE_V2DF_INT:
37738 case UINT_FTYPE_V4SF_INT:
37739 case INT64_FTYPE_V2DF_INT:
37740 case INT64_FTYPE_V4SF_INT:
37741 case INT_FTYPE_V2DF_INT:
37742 case INT_FTYPE_V4SF_INT:
37743 nargs = 2;
37744 break;
37745 case V4SF_FTYPE_V4SF_UINT_INT:
37746 case V4SF_FTYPE_V4SF_UINT64_INT:
37747 case V2DF_FTYPE_V2DF_UINT64_INT:
37748 case V4SF_FTYPE_V4SF_INT_INT:
37749 case V4SF_FTYPE_V4SF_INT64_INT:
37750 case V2DF_FTYPE_V2DF_INT64_INT:
37751 case V4SF_FTYPE_V4SF_V4SF_INT:
37752 case V2DF_FTYPE_V2DF_V2DF_INT:
37753 case V4SF_FTYPE_V4SF_V2DF_INT:
37754 case V2DF_FTYPE_V2DF_V4SF_INT:
37755 nargs = 3;
37756 break;
37757 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37758 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37759 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37760 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37761 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37762 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37763 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37764 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37765 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37766 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37767 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37768 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37769 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37770 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37771 nargs = 4;
37772 break;
37773 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37774 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37775 nargs_constant = 2;
37776 nargs = 4;
37777 break;
37778 case INT_FTYPE_V4SF_V4SF_INT_INT:
37779 case INT_FTYPE_V2DF_V2DF_INT_INT:
37780 return ix86_expand_sse_comi_round (d, exp, target);
37781 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
37782 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37783 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37784 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
37785 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
37786 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
37787 nargs = 5;
37788 break;
37789 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
37790 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
37791 nargs_constant = 4;
37792 nargs = 5;
37793 break;
37794 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
37795 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
37796 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
37797 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
37798 nargs_constant = 3;
37799 nargs = 5;
37800 break;
37801 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
37802 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
37803 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
37804 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
37805 nargs = 6;
37806 nargs_constant = 4;
37807 break;
37808 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
37809 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
37810 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
37811 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
37812 nargs = 6;
37813 nargs_constant = 3;
37814 break;
37815 default:
37816 gcc_unreachable ();
37818 gcc_assert (nargs <= ARRAY_SIZE (args));
37820 if (optimize
37821 || target == 0
37822 || GET_MODE (target) != tmode
37823 || !insn_p->operand[0].predicate (target, tmode))
37824 target = gen_reg_rtx (tmode);
37826 for (i = 0; i < nargs; i++)
37828 tree arg = CALL_EXPR_ARG (exp, i);
37829 rtx op = expand_normal (arg);
37830 machine_mode mode = insn_p->operand[i + 1].mode;
37831 bool match = insn_p->operand[i + 1].predicate (op, mode);
37833 if (i == nargs - nargs_constant)
37835 if (!match)
37837 switch (icode)
37839 case CODE_FOR_avx512f_getmantv8df_mask_round:
37840 case CODE_FOR_avx512f_getmantv16sf_mask_round:
37841 case CODE_FOR_avx512f_vgetmantv2df_round:
37842 case CODE_FOR_avx512f_vgetmantv4sf_round:
37843 error ("the immediate argument must be a 4-bit immediate");
37844 return const0_rtx;
37845 case CODE_FOR_avx512f_cmpv8df3_mask_round:
37846 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
37847 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
37848 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
37849 error ("the immediate argument must be a 5-bit immediate");
37850 return const0_rtx;
37851 default:
37852 error ("the immediate argument must be an 8-bit immediate");
37853 return const0_rtx;
37857 else if (i == nargs-1)
37859 if (!insn_p->operand[nargs].predicate (op, SImode))
37861 error ("incorrect rounding operand");
37862 return const0_rtx;
37865 /* If there is no rounding use normal version of the pattern. */
37866 if (INTVAL (op) == NO_ROUND)
37867 redundant_embed_rnd = 1;
37869 else
37871 if (VECTOR_MODE_P (mode))
37872 op = safe_vector_operand (op, mode);
37874 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37876 if (optimize || !match)
37877 op = copy_to_mode_reg (mode, op);
37879 else
37881 op = copy_to_reg (op);
37882 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37886 args[i].op = op;
37887 args[i].mode = mode;
37890 switch (nargs)
37892 case 1:
37893 pat = GEN_FCN (icode) (target, args[0].op);
37894 break;
37895 case 2:
37896 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37897 break;
37898 case 3:
37899 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37900 args[2].op);
37901 break;
37902 case 4:
37903 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37904 args[2].op, args[3].op);
37905 break;
37906 case 5:
37907 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37908 args[2].op, args[3].op, args[4].op);
37909 case 6:
37910 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37911 args[2].op, args[3].op, args[4].op,
37912 args[5].op);
37913 break;
37914 default:
37915 gcc_unreachable ();
37918 if (!pat)
37919 return 0;
37921 if (redundant_embed_rnd)
37922 pat = ix86_erase_embedded_rounding (pat);
37924 emit_insn (pat);
37925 return target;
37928 /* Subroutine of ix86_expand_builtin to take care of special insns
37929 with variable number of operands. */
37931 static rtx
37932 ix86_expand_special_args_builtin (const struct builtin_description *d,
37933 tree exp, rtx target)
37935 tree arg;
37936 rtx pat, op;
37937 unsigned int i, nargs, arg_adjust, memory;
37938 bool aligned_mem = false;
37939 struct
37941 rtx op;
37942 machine_mode mode;
37943 } args[3];
37944 enum insn_code icode = d->icode;
37945 bool last_arg_constant = false;
37946 const struct insn_data_d *insn_p = &insn_data[icode];
37947 machine_mode tmode = insn_p->operand[0].mode;
37948 enum { load, store } klass;
37950 switch ((enum ix86_builtin_func_type) d->flag)
37952 case VOID_FTYPE_VOID:
37953 emit_insn (GEN_FCN (icode) (target));
37954 return 0;
37955 case VOID_FTYPE_UINT64:
37956 case VOID_FTYPE_UNSIGNED:
37957 nargs = 0;
37958 klass = store;
37959 memory = 0;
37960 break;
37962 case INT_FTYPE_VOID:
37963 case USHORT_FTYPE_VOID:
37964 case UINT64_FTYPE_VOID:
37965 case UNSIGNED_FTYPE_VOID:
37966 nargs = 0;
37967 klass = load;
37968 memory = 0;
37969 break;
37970 case UINT64_FTYPE_PUNSIGNED:
37971 case V2DI_FTYPE_PV2DI:
37972 case V4DI_FTYPE_PV4DI:
37973 case V32QI_FTYPE_PCCHAR:
37974 case V16QI_FTYPE_PCCHAR:
37975 case V8SF_FTYPE_PCV4SF:
37976 case V8SF_FTYPE_PCFLOAT:
37977 case V4SF_FTYPE_PCFLOAT:
37978 case V4DF_FTYPE_PCV2DF:
37979 case V4DF_FTYPE_PCDOUBLE:
37980 case V2DF_FTYPE_PCDOUBLE:
37981 case VOID_FTYPE_PVOID:
37982 case V16SI_FTYPE_PV4SI:
37983 case V16SF_FTYPE_PV4SF:
37984 case V8DI_FTYPE_PV4DI:
37985 case V8DI_FTYPE_PV8DI:
37986 case V8DF_FTYPE_PV4DF:
37987 nargs = 1;
37988 klass = load;
37989 memory = 0;
37990 switch (icode)
37992 case CODE_FOR_sse4_1_movntdqa:
37993 case CODE_FOR_avx2_movntdqa:
37994 case CODE_FOR_avx512f_movntdqa:
37995 aligned_mem = true;
37996 break;
37997 default:
37998 break;
38000 break;
38001 case VOID_FTYPE_PV2SF_V4SF:
38002 case VOID_FTYPE_PV8DI_V8DI:
38003 case VOID_FTYPE_PV4DI_V4DI:
38004 case VOID_FTYPE_PV2DI_V2DI:
38005 case VOID_FTYPE_PCHAR_V32QI:
38006 case VOID_FTYPE_PCHAR_V16QI:
38007 case VOID_FTYPE_PFLOAT_V16SF:
38008 case VOID_FTYPE_PFLOAT_V8SF:
38009 case VOID_FTYPE_PFLOAT_V4SF:
38010 case VOID_FTYPE_PDOUBLE_V8DF:
38011 case VOID_FTYPE_PDOUBLE_V4DF:
38012 case VOID_FTYPE_PDOUBLE_V2DF:
38013 case VOID_FTYPE_PLONGLONG_LONGLONG:
38014 case VOID_FTYPE_PULONGLONG_ULONGLONG:
38015 case VOID_FTYPE_PINT_INT:
38016 nargs = 1;
38017 klass = store;
38018 /* Reserve memory operand for target. */
38019 memory = ARRAY_SIZE (args);
38020 switch (icode)
38022 /* These builtins and instructions require the memory
38023 to be properly aligned. */
38024 case CODE_FOR_avx_movntv4di:
38025 case CODE_FOR_sse2_movntv2di:
38026 case CODE_FOR_avx_movntv8sf:
38027 case CODE_FOR_sse_movntv4sf:
38028 case CODE_FOR_sse4a_vmmovntv4sf:
38029 case CODE_FOR_avx_movntv4df:
38030 case CODE_FOR_sse2_movntv2df:
38031 case CODE_FOR_sse4a_vmmovntv2df:
38032 case CODE_FOR_sse2_movntidi:
38033 case CODE_FOR_sse_movntq:
38034 case CODE_FOR_sse2_movntisi:
38035 case CODE_FOR_avx512f_movntv16sf:
38036 case CODE_FOR_avx512f_movntv8df:
38037 case CODE_FOR_avx512f_movntv8di:
38038 aligned_mem = true;
38039 break;
38040 default:
38041 break;
38043 break;
38044 case V4SF_FTYPE_V4SF_PCV2SF:
38045 case V2DF_FTYPE_V2DF_PCDOUBLE:
38046 nargs = 2;
38047 klass = load;
38048 memory = 1;
38049 break;
38050 case V8SF_FTYPE_PCV8SF_V8SI:
38051 case V4DF_FTYPE_PCV4DF_V4DI:
38052 case V4SF_FTYPE_PCV4SF_V4SI:
38053 case V2DF_FTYPE_PCV2DF_V2DI:
38054 case V8SI_FTYPE_PCV8SI_V8SI:
38055 case V4DI_FTYPE_PCV4DI_V4DI:
38056 case V4SI_FTYPE_PCV4SI_V4SI:
38057 case V2DI_FTYPE_PCV2DI_V2DI:
38058 nargs = 2;
38059 klass = load;
38060 memory = 0;
38061 break;
38062 case VOID_FTYPE_PV8DF_V8DF_QI:
38063 case VOID_FTYPE_PV16SF_V16SF_HI:
38064 case VOID_FTYPE_PV8DI_V8DI_QI:
38065 case VOID_FTYPE_PV4DI_V4DI_QI:
38066 case VOID_FTYPE_PV2DI_V2DI_QI:
38067 case VOID_FTYPE_PV16SI_V16SI_HI:
38068 case VOID_FTYPE_PV8SI_V8SI_QI:
38069 case VOID_FTYPE_PV4SI_V4SI_QI:
38070 switch (icode)
38072 /* These builtins and instructions require the memory
38073 to be properly aligned. */
38074 case CODE_FOR_avx512f_storev16sf_mask:
38075 case CODE_FOR_avx512f_storev16si_mask:
38076 case CODE_FOR_avx512f_storev8df_mask:
38077 case CODE_FOR_avx512f_storev8di_mask:
38078 case CODE_FOR_avx512vl_storev8sf_mask:
38079 case CODE_FOR_avx512vl_storev8si_mask:
38080 case CODE_FOR_avx512vl_storev4df_mask:
38081 case CODE_FOR_avx512vl_storev4di_mask:
38082 case CODE_FOR_avx512vl_storev4sf_mask:
38083 case CODE_FOR_avx512vl_storev4si_mask:
38084 case CODE_FOR_avx512vl_storev2df_mask:
38085 case CODE_FOR_avx512vl_storev2di_mask:
38086 aligned_mem = true;
38087 break;
38088 default:
38089 break;
38091 /* FALLTHRU */
38092 case VOID_FTYPE_PV8SF_V8SI_V8SF:
38093 case VOID_FTYPE_PV4DF_V4DI_V4DF:
38094 case VOID_FTYPE_PV4SF_V4SI_V4SF:
38095 case VOID_FTYPE_PV2DF_V2DI_V2DF:
38096 case VOID_FTYPE_PV8SI_V8SI_V8SI:
38097 case VOID_FTYPE_PV4DI_V4DI_V4DI:
38098 case VOID_FTYPE_PV4SI_V4SI_V4SI:
38099 case VOID_FTYPE_PV2DI_V2DI_V2DI:
38100 case VOID_FTYPE_PDOUBLE_V2DF_QI:
38101 case VOID_FTYPE_PFLOAT_V4SF_QI:
38102 case VOID_FTYPE_PV8SI_V8DI_QI:
38103 case VOID_FTYPE_PV8HI_V8DI_QI:
38104 case VOID_FTYPE_PV16HI_V16SI_HI:
38105 case VOID_FTYPE_PV16QI_V8DI_QI:
38106 case VOID_FTYPE_PV16QI_V16SI_HI:
38107 case VOID_FTYPE_PV4SI_V4DI_QI:
38108 case VOID_FTYPE_PV4SI_V2DI_QI:
38109 case VOID_FTYPE_PV8HI_V4DI_QI:
38110 case VOID_FTYPE_PV8HI_V2DI_QI:
38111 case VOID_FTYPE_PV8HI_V8SI_QI:
38112 case VOID_FTYPE_PV8HI_V4SI_QI:
38113 case VOID_FTYPE_PV16QI_V4DI_QI:
38114 case VOID_FTYPE_PV16QI_V2DI_QI:
38115 case VOID_FTYPE_PV16QI_V8SI_QI:
38116 case VOID_FTYPE_PV16QI_V4SI_QI:
38117 case VOID_FTYPE_PV8HI_V8HI_QI:
38118 case VOID_FTYPE_PV16HI_V16HI_HI:
38119 case VOID_FTYPE_PV32HI_V32HI_SI:
38120 case VOID_FTYPE_PV16QI_V16QI_HI:
38121 case VOID_FTYPE_PV32QI_V32QI_SI:
38122 case VOID_FTYPE_PV64QI_V64QI_DI:
38123 case VOID_FTYPE_PV4DF_V4DF_QI:
38124 case VOID_FTYPE_PV2DF_V2DF_QI:
38125 case VOID_FTYPE_PV8SF_V8SF_QI:
38126 case VOID_FTYPE_PV4SF_V4SF_QI:
38127 nargs = 2;
38128 klass = store;
38129 /* Reserve memory operand for target. */
38130 memory = ARRAY_SIZE (args);
38131 break;
38132 case V4SF_FTYPE_PCV4SF_V4SF_QI:
38133 case V8SF_FTYPE_PCV8SF_V8SF_QI:
38134 case V16SF_FTYPE_PCV16SF_V16SF_HI:
38135 case V4SI_FTYPE_PCV4SI_V4SI_QI:
38136 case V8SI_FTYPE_PCV8SI_V8SI_QI:
38137 case V16SI_FTYPE_PCV16SI_V16SI_HI:
38138 case V2DF_FTYPE_PCV2DF_V2DF_QI:
38139 case V4DF_FTYPE_PCV4DF_V4DF_QI:
38140 case V8DF_FTYPE_PCV8DF_V8DF_QI:
38141 case V2DI_FTYPE_PCV2DI_V2DI_QI:
38142 case V4DI_FTYPE_PCV4DI_V4DI_QI:
38143 case V8DI_FTYPE_PCV8DI_V8DI_QI:
38144 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
38145 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
38146 case V8HI_FTYPE_PCV8HI_V8HI_QI:
38147 case V16HI_FTYPE_PCV16HI_V16HI_HI:
38148 case V32HI_FTYPE_PCV32HI_V32HI_SI:
38149 case V16QI_FTYPE_PCV16QI_V16QI_HI:
38150 case V32QI_FTYPE_PCV32QI_V32QI_SI:
38151 case V64QI_FTYPE_PCV64QI_V64QI_DI:
38152 nargs = 3;
38153 klass = load;
38154 memory = 0;
38155 switch (icode)
38157 /* These builtins and instructions require the memory
38158 to be properly aligned. */
38159 case CODE_FOR_avx512f_loadv16sf_mask:
38160 case CODE_FOR_avx512f_loadv16si_mask:
38161 case CODE_FOR_avx512f_loadv8df_mask:
38162 case CODE_FOR_avx512f_loadv8di_mask:
38163 case CODE_FOR_avx512vl_loadv8sf_mask:
38164 case CODE_FOR_avx512vl_loadv8si_mask:
38165 case CODE_FOR_avx512vl_loadv4df_mask:
38166 case CODE_FOR_avx512vl_loadv4di_mask:
38167 case CODE_FOR_avx512vl_loadv4sf_mask:
38168 case CODE_FOR_avx512vl_loadv4si_mask:
38169 case CODE_FOR_avx512vl_loadv2df_mask:
38170 case CODE_FOR_avx512vl_loadv2di_mask:
38171 case CODE_FOR_avx512bw_loadv64qi_mask:
38172 case CODE_FOR_avx512vl_loadv32qi_mask:
38173 case CODE_FOR_avx512vl_loadv16qi_mask:
38174 case CODE_FOR_avx512bw_loadv32hi_mask:
38175 case CODE_FOR_avx512vl_loadv16hi_mask:
38176 case CODE_FOR_avx512vl_loadv8hi_mask:
38177 aligned_mem = true;
38178 break;
38179 default:
38180 break;
38182 break;
38183 case VOID_FTYPE_UINT_UINT_UINT:
38184 case VOID_FTYPE_UINT64_UINT_UINT:
38185 case UCHAR_FTYPE_UINT_UINT_UINT:
38186 case UCHAR_FTYPE_UINT64_UINT_UINT:
38187 nargs = 3;
38188 klass = load;
38189 memory = ARRAY_SIZE (args);
38190 last_arg_constant = true;
38191 break;
38192 default:
38193 gcc_unreachable ();
38196 gcc_assert (nargs <= ARRAY_SIZE (args));
38198 if (klass == store)
38200 arg = CALL_EXPR_ARG (exp, 0);
38201 op = expand_normal (arg);
38202 gcc_assert (target == 0);
38203 if (memory)
38205 op = ix86_zero_extend_to_Pmode (op);
38206 target = gen_rtx_MEM (tmode, op);
38207 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
38208 on it. Try to improve it using get_pointer_alignment,
38209 and if the special builtin is one that requires strict
38210 mode alignment, also from it's GET_MODE_ALIGNMENT.
38211 Failure to do so could lead to ix86_legitimate_combined_insn
38212 rejecting all changes to such insns. */
38213 unsigned int align = get_pointer_alignment (arg);
38214 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
38215 align = GET_MODE_ALIGNMENT (tmode);
38216 if (MEM_ALIGN (target) < align)
38217 set_mem_align (target, align);
38219 else
38220 target = force_reg (tmode, op);
38221 arg_adjust = 1;
38223 else
38225 arg_adjust = 0;
38226 if (optimize
38227 || target == 0
38228 || !register_operand (target, tmode)
38229 || GET_MODE (target) != tmode)
38230 target = gen_reg_rtx (tmode);
38233 for (i = 0; i < nargs; i++)
38235 machine_mode mode = insn_p->operand[i + 1].mode;
38236 bool match;
38238 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
38239 op = expand_normal (arg);
38240 match = insn_p->operand[i + 1].predicate (op, mode);
38242 if (last_arg_constant && (i + 1) == nargs)
38244 if (!match)
38246 if (icode == CODE_FOR_lwp_lwpvalsi3
38247 || icode == CODE_FOR_lwp_lwpinssi3
38248 || icode == CODE_FOR_lwp_lwpvaldi3
38249 || icode == CODE_FOR_lwp_lwpinsdi3)
38250 error ("the last argument must be a 32-bit immediate");
38251 else
38252 error ("the last argument must be an 8-bit immediate");
38253 return const0_rtx;
38256 else
38258 if (i == memory)
38260 /* This must be the memory operand. */
38261 op = ix86_zero_extend_to_Pmode (op);
38262 op = gen_rtx_MEM (mode, op);
38263 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
38264 on it. Try to improve it using get_pointer_alignment,
38265 and if the special builtin is one that requires strict
38266 mode alignment, also from it's GET_MODE_ALIGNMENT.
38267 Failure to do so could lead to ix86_legitimate_combined_insn
38268 rejecting all changes to such insns. */
38269 unsigned int align = get_pointer_alignment (arg);
38270 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
38271 align = GET_MODE_ALIGNMENT (mode);
38272 if (MEM_ALIGN (op) < align)
38273 set_mem_align (op, align);
38275 else
38277 /* This must be register. */
38278 if (VECTOR_MODE_P (mode))
38279 op = safe_vector_operand (op, mode);
38281 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38282 op = copy_to_mode_reg (mode, op);
38283 else
38285 op = copy_to_reg (op);
38286 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38291 args[i].op = op;
38292 args[i].mode = mode;
38295 switch (nargs)
38297 case 0:
38298 pat = GEN_FCN (icode) (target);
38299 break;
38300 case 1:
38301 pat = GEN_FCN (icode) (target, args[0].op);
38302 break;
38303 case 2:
38304 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38305 break;
38306 case 3:
38307 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38308 break;
38309 default:
38310 gcc_unreachable ();
38313 if (! pat)
38314 return 0;
38315 emit_insn (pat);
38316 return klass == store ? 0 : target;
38319 /* Return the integer constant in ARG. Constrain it to be in the range
38320 of the subparts of VEC_TYPE; issue an error if not. */
38322 static int
38323 get_element_number (tree vec_type, tree arg)
38325 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
38327 if (!tree_fits_uhwi_p (arg)
38328 || (elt = tree_to_uhwi (arg), elt > max))
38330 error ("selector must be an integer constant in the range 0..%wi", max);
38331 return 0;
38334 return elt;
38337 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38338 ix86_expand_vector_init. We DO have language-level syntax for this, in
38339 the form of (type){ init-list }. Except that since we can't place emms
38340 instructions from inside the compiler, we can't allow the use of MMX
38341 registers unless the user explicitly asks for it. So we do *not* define
38342 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
38343 we have builtins invoked by mmintrin.h that gives us license to emit
38344 these sorts of instructions. */
38346 static rtx
38347 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
38349 machine_mode tmode = TYPE_MODE (type);
38350 machine_mode inner_mode = GET_MODE_INNER (tmode);
38351 int i, n_elt = GET_MODE_NUNITS (tmode);
38352 rtvec v = rtvec_alloc (n_elt);
38354 gcc_assert (VECTOR_MODE_P (tmode));
38355 gcc_assert (call_expr_nargs (exp) == n_elt);
38357 for (i = 0; i < n_elt; ++i)
38359 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38360 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38363 if (!target || !register_operand (target, tmode))
38364 target = gen_reg_rtx (tmode);
38366 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38367 return target;
38370 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38371 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
38372 had a language-level syntax for referencing vector elements. */
38374 static rtx
38375 ix86_expand_vec_ext_builtin (tree exp, rtx target)
38377 machine_mode tmode, mode0;
38378 tree arg0, arg1;
38379 int elt;
38380 rtx op0;
38382 arg0 = CALL_EXPR_ARG (exp, 0);
38383 arg1 = CALL_EXPR_ARG (exp, 1);
38385 op0 = expand_normal (arg0);
38386 elt = get_element_number (TREE_TYPE (arg0), arg1);
38388 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38389 mode0 = TYPE_MODE (TREE_TYPE (arg0));
38390 gcc_assert (VECTOR_MODE_P (mode0));
38392 op0 = force_reg (mode0, op0);
38394 if (optimize || !target || !register_operand (target, tmode))
38395 target = gen_reg_rtx (tmode);
38397 ix86_expand_vector_extract (true, target, op0, elt);
38399 return target;
38402 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38403 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
38404 a language-level syntax for referencing vector elements. */
38406 static rtx
38407 ix86_expand_vec_set_builtin (tree exp)
38409 machine_mode tmode, mode1;
38410 tree arg0, arg1, arg2;
38411 int elt;
38412 rtx op0, op1, target;
38414 arg0 = CALL_EXPR_ARG (exp, 0);
38415 arg1 = CALL_EXPR_ARG (exp, 1);
38416 arg2 = CALL_EXPR_ARG (exp, 2);
38418 tmode = TYPE_MODE (TREE_TYPE (arg0));
38419 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38420 gcc_assert (VECTOR_MODE_P (tmode));
38422 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38423 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38424 elt = get_element_number (TREE_TYPE (arg0), arg2);
38426 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38427 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38429 op0 = force_reg (tmode, op0);
38430 op1 = force_reg (mode1, op1);
38432 /* OP0 is the source of these builtin functions and shouldn't be
38433 modified. Create a copy, use it and return it as target. */
38434 target = gen_reg_rtx (tmode);
38435 emit_move_insn (target, op0);
38436 ix86_expand_vector_set (true, target, op1, elt);
38438 return target;
38441 /* Emit conditional move of SRC to DST with condition
38442 OP1 CODE OP2. */
38443 static void
38444 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38446 rtx t;
38448 if (TARGET_CMOVE)
38450 t = ix86_expand_compare (code, op1, op2);
38451 emit_insn (gen_rtx_SET (VOIDmode, dst,
38452 gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38453 src, dst)));
38455 else
38457 rtx nomove = gen_label_rtx ();
38458 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38459 const0_rtx, GET_MODE (op1), 1, nomove);
38460 emit_move_insn (dst, src);
38461 emit_label (nomove);
38465 /* Choose max of DST and SRC and put it to DST. */
38466 static void
38467 ix86_emit_move_max (rtx dst, rtx src)
38469 ix86_emit_cmove (dst, src, LTU, dst, src);
38472 /* Expand an expression EXP that calls a built-in function,
38473 with result going to TARGET if that's convenient
38474 (and in mode MODE if that's convenient).
38475 SUBTARGET may be used as the target for computing one of EXP's operands.
38476 IGNORE is nonzero if the value is to be ignored. */
38478 static rtx
38479 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38480 machine_mode mode, int ignore)
38482 const struct builtin_description *d;
38483 size_t i;
38484 enum insn_code icode;
38485 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38486 tree arg0, arg1, arg2, arg3, arg4;
38487 rtx op0, op1, op2, op3, op4, pat, insn;
38488 machine_mode mode0, mode1, mode2, mode3, mode4;
38489 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38491 /* For CPU builtins that can be folded, fold first and expand the fold. */
38492 switch (fcode)
38494 case IX86_BUILTIN_CPU_INIT:
38496 /* Make it call __cpu_indicator_init in libgcc. */
38497 tree call_expr, fndecl, type;
38498 type = build_function_type_list (integer_type_node, NULL_TREE);
38499 fndecl = build_fn_decl ("__cpu_indicator_init", type);
38500 call_expr = build_call_expr (fndecl, 0);
38501 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38503 case IX86_BUILTIN_CPU_IS:
38504 case IX86_BUILTIN_CPU_SUPPORTS:
38506 tree arg0 = CALL_EXPR_ARG (exp, 0);
38507 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38508 gcc_assert (fold_expr != NULL_TREE);
38509 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38513 /* Determine whether the builtin function is available under the current ISA.
38514 Originally the builtin was not created if it wasn't applicable to the
38515 current ISA based on the command line switches. With function specific
38516 options, we need to check in the context of the function making the call
38517 whether it is supported. */
38518 if (ix86_builtins_isa[fcode].isa
38519 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38521 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38522 NULL, (enum fpmath_unit) 0, false);
38524 if (!opts)
38525 error ("%qE needs unknown isa option", fndecl);
38526 else
38528 gcc_assert (opts != NULL);
38529 error ("%qE needs isa option %s", fndecl, opts);
38530 free (opts);
38532 return const0_rtx;
38535 switch (fcode)
38537 case IX86_BUILTIN_BNDMK:
38538 if (!target
38539 || GET_MODE (target) != BNDmode
38540 || !register_operand (target, BNDmode))
38541 target = gen_reg_rtx (BNDmode);
38543 arg0 = CALL_EXPR_ARG (exp, 0);
38544 arg1 = CALL_EXPR_ARG (exp, 1);
38546 op0 = expand_normal (arg0);
38547 op1 = expand_normal (arg1);
38549 if (!register_operand (op0, Pmode))
38550 op0 = ix86_zero_extend_to_Pmode (op0);
38551 if (!register_operand (op1, Pmode))
38552 op1 = ix86_zero_extend_to_Pmode (op1);
38554 /* Builtin arg1 is size of block but instruction op1 should
38555 be (size - 1). */
38556 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38557 NULL_RTX, 1, OPTAB_DIRECT);
38559 emit_insn (BNDmode == BND64mode
38560 ? gen_bnd64_mk (target, op0, op1)
38561 : gen_bnd32_mk (target, op0, op1));
38562 return target;
38564 case IX86_BUILTIN_BNDSTX:
38565 arg0 = CALL_EXPR_ARG (exp, 0);
38566 arg1 = CALL_EXPR_ARG (exp, 1);
38567 arg2 = CALL_EXPR_ARG (exp, 2);
38569 op0 = expand_normal (arg0);
38570 op1 = expand_normal (arg1);
38571 op2 = expand_normal (arg2);
38573 if (!register_operand (op0, Pmode))
38574 op0 = ix86_zero_extend_to_Pmode (op0);
38575 if (!register_operand (op1, BNDmode))
38576 op1 = copy_to_mode_reg (BNDmode, op1);
38577 if (!register_operand (op2, Pmode))
38578 op2 = ix86_zero_extend_to_Pmode (op2);
38580 emit_insn (BNDmode == BND64mode
38581 ? gen_bnd64_stx (op2, op0, op1)
38582 : gen_bnd32_stx (op2, op0, op1));
38583 return 0;
38585 case IX86_BUILTIN_BNDLDX:
38586 if (!target
38587 || GET_MODE (target) != BNDmode
38588 || !register_operand (target, BNDmode))
38589 target = gen_reg_rtx (BNDmode);
38591 arg0 = CALL_EXPR_ARG (exp, 0);
38592 arg1 = CALL_EXPR_ARG (exp, 1);
38594 op0 = expand_normal (arg0);
38595 op1 = expand_normal (arg1);
38597 if (!register_operand (op0, Pmode))
38598 op0 = ix86_zero_extend_to_Pmode (op0);
38599 if (!register_operand (op1, Pmode))
38600 op1 = ix86_zero_extend_to_Pmode (op1);
38602 emit_insn (BNDmode == BND64mode
38603 ? gen_bnd64_ldx (target, op0, op1)
38604 : gen_bnd32_ldx (target, op0, op1));
38605 return target;
38607 case IX86_BUILTIN_BNDCL:
38608 arg0 = CALL_EXPR_ARG (exp, 0);
38609 arg1 = CALL_EXPR_ARG (exp, 1);
38611 op0 = expand_normal (arg0);
38612 op1 = expand_normal (arg1);
38614 if (!register_operand (op0, Pmode))
38615 op0 = ix86_zero_extend_to_Pmode (op0);
38616 if (!register_operand (op1, BNDmode))
38617 op1 = copy_to_mode_reg (BNDmode, op1);
38619 emit_insn (BNDmode == BND64mode
38620 ? gen_bnd64_cl (op1, op0)
38621 : gen_bnd32_cl (op1, op0));
38622 return 0;
38624 case IX86_BUILTIN_BNDCU:
38625 arg0 = CALL_EXPR_ARG (exp, 0);
38626 arg1 = CALL_EXPR_ARG (exp, 1);
38628 op0 = expand_normal (arg0);
38629 op1 = expand_normal (arg1);
38631 if (!register_operand (op0, Pmode))
38632 op0 = ix86_zero_extend_to_Pmode (op0);
38633 if (!register_operand (op1, BNDmode))
38634 op1 = copy_to_mode_reg (BNDmode, op1);
38636 emit_insn (BNDmode == BND64mode
38637 ? gen_bnd64_cu (op1, op0)
38638 : gen_bnd32_cu (op1, op0));
38639 return 0;
38641 case IX86_BUILTIN_BNDRET:
38642 arg0 = CALL_EXPR_ARG (exp, 0);
38643 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38644 target = chkp_get_rtl_bounds (arg0);
38646 /* If no bounds were specified for returned value,
38647 then use INIT bounds. It usually happens when
38648 some built-in function is expanded. */
38649 if (!target)
38651 rtx t1 = gen_reg_rtx (Pmode);
38652 rtx t2 = gen_reg_rtx (Pmode);
38653 target = gen_reg_rtx (BNDmode);
38654 emit_move_insn (t1, const0_rtx);
38655 emit_move_insn (t2, constm1_rtx);
38656 emit_insn (BNDmode == BND64mode
38657 ? gen_bnd64_mk (target, t1, t2)
38658 : gen_bnd32_mk (target, t1, t2));
38661 gcc_assert (target && REG_P (target));
38662 return target;
38664 case IX86_BUILTIN_BNDNARROW:
38666 rtx m1, m1h1, m1h2, lb, ub, t1;
38668 /* Return value and lb. */
38669 arg0 = CALL_EXPR_ARG (exp, 0);
38670 /* Bounds. */
38671 arg1 = CALL_EXPR_ARG (exp, 1);
38672 /* Size. */
38673 arg2 = CALL_EXPR_ARG (exp, 2);
38675 lb = expand_normal (arg0);
38676 op1 = expand_normal (arg1);
38677 op2 = expand_normal (arg2);
38679 /* Size was passed but we need to use (size - 1) as for bndmk. */
38680 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38681 NULL_RTX, 1, OPTAB_DIRECT);
38683 /* Add LB to size and inverse to get UB. */
38684 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38685 op2, 1, OPTAB_DIRECT);
38686 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38688 if (!register_operand (lb, Pmode))
38689 lb = ix86_zero_extend_to_Pmode (lb);
38690 if (!register_operand (ub, Pmode))
38691 ub = ix86_zero_extend_to_Pmode (ub);
38693 /* We need to move bounds to memory before any computations. */
38694 if (MEM_P (op1))
38695 m1 = op1;
38696 else
38698 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38699 emit_move_insn (m1, op1);
38702 /* Generate mem expression to be used for access to LB and UB. */
38703 m1h1 = adjust_address (m1, Pmode, 0);
38704 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38706 t1 = gen_reg_rtx (Pmode);
38708 /* Compute LB. */
38709 emit_move_insn (t1, m1h1);
38710 ix86_emit_move_max (t1, lb);
38711 emit_move_insn (m1h1, t1);
38713 /* Compute UB. UB is stored in 1's complement form. Therefore
38714 we also use max here. */
38715 emit_move_insn (t1, m1h2);
38716 ix86_emit_move_max (t1, ub);
38717 emit_move_insn (m1h2, t1);
38719 op2 = gen_reg_rtx (BNDmode);
38720 emit_move_insn (op2, m1);
38722 return chkp_join_splitted_slot (lb, op2);
38725 case IX86_BUILTIN_BNDINT:
38727 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38729 if (!target
38730 || GET_MODE (target) != BNDmode
38731 || !register_operand (target, BNDmode))
38732 target = gen_reg_rtx (BNDmode);
38734 arg0 = CALL_EXPR_ARG (exp, 0);
38735 arg1 = CALL_EXPR_ARG (exp, 1);
38737 op0 = expand_normal (arg0);
38738 op1 = expand_normal (arg1);
38740 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38741 rh1 = adjust_address (res, Pmode, 0);
38742 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38744 /* Put first bounds to temporaries. */
38745 lb1 = gen_reg_rtx (Pmode);
38746 ub1 = gen_reg_rtx (Pmode);
38747 if (MEM_P (op0))
38749 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38750 emit_move_insn (ub1, adjust_address (op0, Pmode,
38751 GET_MODE_SIZE (Pmode)));
38753 else
38755 emit_move_insn (res, op0);
38756 emit_move_insn (lb1, rh1);
38757 emit_move_insn (ub1, rh2);
38760 /* Put second bounds to temporaries. */
38761 lb2 = gen_reg_rtx (Pmode);
38762 ub2 = gen_reg_rtx (Pmode);
38763 if (MEM_P (op1))
38765 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38766 emit_move_insn (ub2, adjust_address (op1, Pmode,
38767 GET_MODE_SIZE (Pmode)));
38769 else
38771 emit_move_insn (res, op1);
38772 emit_move_insn (lb2, rh1);
38773 emit_move_insn (ub2, rh2);
38776 /* Compute LB. */
38777 ix86_emit_move_max (lb1, lb2);
38778 emit_move_insn (rh1, lb1);
38780 /* Compute UB. UB is stored in 1's complement form. Therefore
38781 we also use max here. */
38782 ix86_emit_move_max (ub1, ub2);
38783 emit_move_insn (rh2, ub1);
38785 emit_move_insn (target, res);
38787 return target;
38790 case IX86_BUILTIN_SIZEOF:
38792 tree name;
38793 rtx symbol;
38795 if (!target
38796 || GET_MODE (target) != Pmode
38797 || !register_operand (target, Pmode))
38798 target = gen_reg_rtx (Pmode);
38800 arg0 = CALL_EXPR_ARG (exp, 0);
38801 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
38803 name = DECL_ASSEMBLER_NAME (arg0);
38804 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
38806 emit_insn (Pmode == SImode
38807 ? gen_move_size_reloc_si (target, symbol)
38808 : gen_move_size_reloc_di (target, symbol));
38810 return target;
38813 case IX86_BUILTIN_BNDLOWER:
38815 rtx mem, hmem;
38817 if (!target
38818 || GET_MODE (target) != Pmode
38819 || !register_operand (target, Pmode))
38820 target = gen_reg_rtx (Pmode);
38822 arg0 = CALL_EXPR_ARG (exp, 0);
38823 op0 = expand_normal (arg0);
38825 /* We need to move bounds to memory first. */
38826 if (MEM_P (op0))
38827 mem = op0;
38828 else
38830 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38831 emit_move_insn (mem, op0);
38834 /* Generate mem expression to access LB and load it. */
38835 hmem = adjust_address (mem, Pmode, 0);
38836 emit_move_insn (target, hmem);
38838 return target;
38841 case IX86_BUILTIN_BNDUPPER:
38843 rtx mem, hmem, res;
38845 if (!target
38846 || GET_MODE (target) != Pmode
38847 || !register_operand (target, Pmode))
38848 target = gen_reg_rtx (Pmode);
38850 arg0 = CALL_EXPR_ARG (exp, 0);
38851 op0 = expand_normal (arg0);
38853 /* We need to move bounds to memory first. */
38854 if (MEM_P (op0))
38855 mem = op0;
38856 else
38858 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38859 emit_move_insn (mem, op0);
38862 /* Generate mem expression to access UB. */
38863 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
38865 /* We need to inverse all bits of UB. */
38866 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
38868 if (res != target)
38869 emit_move_insn (target, res);
38871 return target;
38874 case IX86_BUILTIN_MASKMOVQ:
38875 case IX86_BUILTIN_MASKMOVDQU:
38876 icode = (fcode == IX86_BUILTIN_MASKMOVQ
38877 ? CODE_FOR_mmx_maskmovq
38878 : CODE_FOR_sse2_maskmovdqu);
38879 /* Note the arg order is different from the operand order. */
38880 arg1 = CALL_EXPR_ARG (exp, 0);
38881 arg2 = CALL_EXPR_ARG (exp, 1);
38882 arg0 = CALL_EXPR_ARG (exp, 2);
38883 op0 = expand_normal (arg0);
38884 op1 = expand_normal (arg1);
38885 op2 = expand_normal (arg2);
38886 mode0 = insn_data[icode].operand[0].mode;
38887 mode1 = insn_data[icode].operand[1].mode;
38888 mode2 = insn_data[icode].operand[2].mode;
38890 op0 = ix86_zero_extend_to_Pmode (op0);
38891 op0 = gen_rtx_MEM (mode1, op0);
38893 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38894 op0 = copy_to_mode_reg (mode0, op0);
38895 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38896 op1 = copy_to_mode_reg (mode1, op1);
38897 if (!insn_data[icode].operand[2].predicate (op2, mode2))
38898 op2 = copy_to_mode_reg (mode2, op2);
38899 pat = GEN_FCN (icode) (op0, op1, op2);
38900 if (! pat)
38901 return 0;
38902 emit_insn (pat);
38903 return 0;
38905 case IX86_BUILTIN_LDMXCSR:
38906 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
38907 target = assign_386_stack_local (SImode, SLOT_TEMP);
38908 emit_move_insn (target, op0);
38909 emit_insn (gen_sse_ldmxcsr (target));
38910 return 0;
38912 case IX86_BUILTIN_STMXCSR:
38913 target = assign_386_stack_local (SImode, SLOT_TEMP);
38914 emit_insn (gen_sse_stmxcsr (target));
38915 return copy_to_mode_reg (SImode, target);
38917 case IX86_BUILTIN_CLFLUSH:
38918 arg0 = CALL_EXPR_ARG (exp, 0);
38919 op0 = expand_normal (arg0);
38920 icode = CODE_FOR_sse2_clflush;
38921 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38922 op0 = ix86_zero_extend_to_Pmode (op0);
38924 emit_insn (gen_sse2_clflush (op0));
38925 return 0;
38927 case IX86_BUILTIN_CLWB:
38928 arg0 = CALL_EXPR_ARG (exp, 0);
38929 op0 = expand_normal (arg0);
38930 icode = CODE_FOR_clwb;
38931 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38932 op0 = ix86_zero_extend_to_Pmode (op0);
38934 emit_insn (gen_clwb (op0));
38935 return 0;
38937 case IX86_BUILTIN_CLFLUSHOPT:
38938 arg0 = CALL_EXPR_ARG (exp, 0);
38939 op0 = expand_normal (arg0);
38940 icode = CODE_FOR_clflushopt;
38941 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38942 op0 = ix86_zero_extend_to_Pmode (op0);
38944 emit_insn (gen_clflushopt (op0));
38945 return 0;
38947 case IX86_BUILTIN_MONITOR:
38948 arg0 = CALL_EXPR_ARG (exp, 0);
38949 arg1 = CALL_EXPR_ARG (exp, 1);
38950 arg2 = CALL_EXPR_ARG (exp, 2);
38951 op0 = expand_normal (arg0);
38952 op1 = expand_normal (arg1);
38953 op2 = expand_normal (arg2);
38954 if (!REG_P (op0))
38955 op0 = ix86_zero_extend_to_Pmode (op0);
38956 if (!REG_P (op1))
38957 op1 = copy_to_mode_reg (SImode, op1);
38958 if (!REG_P (op2))
38959 op2 = copy_to_mode_reg (SImode, op2);
38960 emit_insn (ix86_gen_monitor (op0, op1, op2));
38961 return 0;
38963 case IX86_BUILTIN_MWAIT:
38964 arg0 = CALL_EXPR_ARG (exp, 0);
38965 arg1 = CALL_EXPR_ARG (exp, 1);
38966 op0 = expand_normal (arg0);
38967 op1 = expand_normal (arg1);
38968 if (!REG_P (op0))
38969 op0 = copy_to_mode_reg (SImode, op0);
38970 if (!REG_P (op1))
38971 op1 = copy_to_mode_reg (SImode, op1);
38972 emit_insn (gen_sse3_mwait (op0, op1));
38973 return 0;
38975 case IX86_BUILTIN_VEC_INIT_V2SI:
38976 case IX86_BUILTIN_VEC_INIT_V4HI:
38977 case IX86_BUILTIN_VEC_INIT_V8QI:
38978 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
38980 case IX86_BUILTIN_VEC_EXT_V2DF:
38981 case IX86_BUILTIN_VEC_EXT_V2DI:
38982 case IX86_BUILTIN_VEC_EXT_V4SF:
38983 case IX86_BUILTIN_VEC_EXT_V4SI:
38984 case IX86_BUILTIN_VEC_EXT_V8HI:
38985 case IX86_BUILTIN_VEC_EXT_V2SI:
38986 case IX86_BUILTIN_VEC_EXT_V4HI:
38987 case IX86_BUILTIN_VEC_EXT_V16QI:
38988 return ix86_expand_vec_ext_builtin (exp, target);
38990 case IX86_BUILTIN_VEC_SET_V2DI:
38991 case IX86_BUILTIN_VEC_SET_V4SF:
38992 case IX86_BUILTIN_VEC_SET_V4SI:
38993 case IX86_BUILTIN_VEC_SET_V8HI:
38994 case IX86_BUILTIN_VEC_SET_V4HI:
38995 case IX86_BUILTIN_VEC_SET_V16QI:
38996 return ix86_expand_vec_set_builtin (exp);
38998 case IX86_BUILTIN_INFQ:
38999 case IX86_BUILTIN_HUGE_VALQ:
39001 REAL_VALUE_TYPE inf;
39002 rtx tmp;
39004 real_inf (&inf);
39005 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
39007 tmp = validize_mem (force_const_mem (mode, tmp));
39009 if (target == 0)
39010 target = gen_reg_rtx (mode);
39012 emit_move_insn (target, tmp);
39013 return target;
39016 case IX86_BUILTIN_RDPMC:
39017 case IX86_BUILTIN_RDTSC:
39018 case IX86_BUILTIN_RDTSCP:
39020 op0 = gen_reg_rtx (DImode);
39021 op1 = gen_reg_rtx (DImode);
39023 if (fcode == IX86_BUILTIN_RDPMC)
39025 arg0 = CALL_EXPR_ARG (exp, 0);
39026 op2 = expand_normal (arg0);
39027 if (!register_operand (op2, SImode))
39028 op2 = copy_to_mode_reg (SImode, op2);
39030 insn = (TARGET_64BIT
39031 ? gen_rdpmc_rex64 (op0, op1, op2)
39032 : gen_rdpmc (op0, op2));
39033 emit_insn (insn);
39035 else if (fcode == IX86_BUILTIN_RDTSC)
39037 insn = (TARGET_64BIT
39038 ? gen_rdtsc_rex64 (op0, op1)
39039 : gen_rdtsc (op0));
39040 emit_insn (insn);
39042 else
39044 op2 = gen_reg_rtx (SImode);
39046 insn = (TARGET_64BIT
39047 ? gen_rdtscp_rex64 (op0, op1, op2)
39048 : gen_rdtscp (op0, op2));
39049 emit_insn (insn);
39051 arg0 = CALL_EXPR_ARG (exp, 0);
39052 op4 = expand_normal (arg0);
39053 if (!address_operand (op4, VOIDmode))
39055 op4 = convert_memory_address (Pmode, op4);
39056 op4 = copy_addr_to_reg (op4);
39058 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
39061 if (target == 0)
39063 /* mode is VOIDmode if __builtin_rd* has been called
39064 without lhs. */
39065 if (mode == VOIDmode)
39066 return target;
39067 target = gen_reg_rtx (mode);
39070 if (TARGET_64BIT)
39072 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
39073 op1, 1, OPTAB_DIRECT);
39074 op0 = expand_simple_binop (DImode, IOR, op0, op1,
39075 op0, 1, OPTAB_DIRECT);
39078 emit_move_insn (target, op0);
39079 return target;
39081 case IX86_BUILTIN_FXSAVE:
39082 case IX86_BUILTIN_FXRSTOR:
39083 case IX86_BUILTIN_FXSAVE64:
39084 case IX86_BUILTIN_FXRSTOR64:
39085 case IX86_BUILTIN_FNSTENV:
39086 case IX86_BUILTIN_FLDENV:
39087 mode0 = BLKmode;
39088 switch (fcode)
39090 case IX86_BUILTIN_FXSAVE:
39091 icode = CODE_FOR_fxsave;
39092 break;
39093 case IX86_BUILTIN_FXRSTOR:
39094 icode = CODE_FOR_fxrstor;
39095 break;
39096 case IX86_BUILTIN_FXSAVE64:
39097 icode = CODE_FOR_fxsave64;
39098 break;
39099 case IX86_BUILTIN_FXRSTOR64:
39100 icode = CODE_FOR_fxrstor64;
39101 break;
39102 case IX86_BUILTIN_FNSTENV:
39103 icode = CODE_FOR_fnstenv;
39104 break;
39105 case IX86_BUILTIN_FLDENV:
39106 icode = CODE_FOR_fldenv;
39107 break;
39108 default:
39109 gcc_unreachable ();
39112 arg0 = CALL_EXPR_ARG (exp, 0);
39113 op0 = expand_normal (arg0);
39115 if (!address_operand (op0, VOIDmode))
39117 op0 = convert_memory_address (Pmode, op0);
39118 op0 = copy_addr_to_reg (op0);
39120 op0 = gen_rtx_MEM (mode0, op0);
39122 pat = GEN_FCN (icode) (op0);
39123 if (pat)
39124 emit_insn (pat);
39125 return 0;
39127 case IX86_BUILTIN_XSAVE:
39128 case IX86_BUILTIN_XRSTOR:
39129 case IX86_BUILTIN_XSAVE64:
39130 case IX86_BUILTIN_XRSTOR64:
39131 case IX86_BUILTIN_XSAVEOPT:
39132 case IX86_BUILTIN_XSAVEOPT64:
39133 case IX86_BUILTIN_XSAVES:
39134 case IX86_BUILTIN_XRSTORS:
39135 case IX86_BUILTIN_XSAVES64:
39136 case IX86_BUILTIN_XRSTORS64:
39137 case IX86_BUILTIN_XSAVEC:
39138 case IX86_BUILTIN_XSAVEC64:
39139 arg0 = CALL_EXPR_ARG (exp, 0);
39140 arg1 = CALL_EXPR_ARG (exp, 1);
39141 op0 = expand_normal (arg0);
39142 op1 = expand_normal (arg1);
39144 if (!address_operand (op0, VOIDmode))
39146 op0 = convert_memory_address (Pmode, op0);
39147 op0 = copy_addr_to_reg (op0);
39149 op0 = gen_rtx_MEM (BLKmode, op0);
39151 op1 = force_reg (DImode, op1);
39153 if (TARGET_64BIT)
39155 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
39156 NULL, 1, OPTAB_DIRECT);
39157 switch (fcode)
39159 case IX86_BUILTIN_XSAVE:
39160 icode = CODE_FOR_xsave_rex64;
39161 break;
39162 case IX86_BUILTIN_XRSTOR:
39163 icode = CODE_FOR_xrstor_rex64;
39164 break;
39165 case IX86_BUILTIN_XSAVE64:
39166 icode = CODE_FOR_xsave64;
39167 break;
39168 case IX86_BUILTIN_XRSTOR64:
39169 icode = CODE_FOR_xrstor64;
39170 break;
39171 case IX86_BUILTIN_XSAVEOPT:
39172 icode = CODE_FOR_xsaveopt_rex64;
39173 break;
39174 case IX86_BUILTIN_XSAVEOPT64:
39175 icode = CODE_FOR_xsaveopt64;
39176 break;
39177 case IX86_BUILTIN_XSAVES:
39178 icode = CODE_FOR_xsaves_rex64;
39179 break;
39180 case IX86_BUILTIN_XRSTORS:
39181 icode = CODE_FOR_xrstors_rex64;
39182 break;
39183 case IX86_BUILTIN_XSAVES64:
39184 icode = CODE_FOR_xsaves64;
39185 break;
39186 case IX86_BUILTIN_XRSTORS64:
39187 icode = CODE_FOR_xrstors64;
39188 break;
39189 case IX86_BUILTIN_XSAVEC:
39190 icode = CODE_FOR_xsavec_rex64;
39191 break;
39192 case IX86_BUILTIN_XSAVEC64:
39193 icode = CODE_FOR_xsavec64;
39194 break;
39195 default:
39196 gcc_unreachable ();
39199 op2 = gen_lowpart (SImode, op2);
39200 op1 = gen_lowpart (SImode, op1);
39201 pat = GEN_FCN (icode) (op0, op1, op2);
39203 else
39205 switch (fcode)
39207 case IX86_BUILTIN_XSAVE:
39208 icode = CODE_FOR_xsave;
39209 break;
39210 case IX86_BUILTIN_XRSTOR:
39211 icode = CODE_FOR_xrstor;
39212 break;
39213 case IX86_BUILTIN_XSAVEOPT:
39214 icode = CODE_FOR_xsaveopt;
39215 break;
39216 case IX86_BUILTIN_XSAVES:
39217 icode = CODE_FOR_xsaves;
39218 break;
39219 case IX86_BUILTIN_XRSTORS:
39220 icode = CODE_FOR_xrstors;
39221 break;
39222 case IX86_BUILTIN_XSAVEC:
39223 icode = CODE_FOR_xsavec;
39224 break;
39225 default:
39226 gcc_unreachable ();
39228 pat = GEN_FCN (icode) (op0, op1);
39231 if (pat)
39232 emit_insn (pat);
39233 return 0;
39235 case IX86_BUILTIN_LLWPCB:
39236 arg0 = CALL_EXPR_ARG (exp, 0);
39237 op0 = expand_normal (arg0);
39238 icode = CODE_FOR_lwp_llwpcb;
39239 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39240 op0 = ix86_zero_extend_to_Pmode (op0);
39241 emit_insn (gen_lwp_llwpcb (op0));
39242 return 0;
39244 case IX86_BUILTIN_SLWPCB:
39245 icode = CODE_FOR_lwp_slwpcb;
39246 if (!target
39247 || !insn_data[icode].operand[0].predicate (target, Pmode))
39248 target = gen_reg_rtx (Pmode);
39249 emit_insn (gen_lwp_slwpcb (target));
39250 return target;
39252 case IX86_BUILTIN_BEXTRI32:
39253 case IX86_BUILTIN_BEXTRI64:
39254 arg0 = CALL_EXPR_ARG (exp, 0);
39255 arg1 = CALL_EXPR_ARG (exp, 1);
39256 op0 = expand_normal (arg0);
39257 op1 = expand_normal (arg1);
39258 icode = (fcode == IX86_BUILTIN_BEXTRI32
39259 ? CODE_FOR_tbm_bextri_si
39260 : CODE_FOR_tbm_bextri_di);
39261 if (!CONST_INT_P (op1))
39263 error ("last argument must be an immediate");
39264 return const0_rtx;
39266 else
39268 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
39269 unsigned char lsb_index = INTVAL (op1) & 0xFF;
39270 op1 = GEN_INT (length);
39271 op2 = GEN_INT (lsb_index);
39272 pat = GEN_FCN (icode) (target, op0, op1, op2);
39273 if (pat)
39274 emit_insn (pat);
39275 return target;
39278 case IX86_BUILTIN_RDRAND16_STEP:
39279 icode = CODE_FOR_rdrandhi_1;
39280 mode0 = HImode;
39281 goto rdrand_step;
39283 case IX86_BUILTIN_RDRAND32_STEP:
39284 icode = CODE_FOR_rdrandsi_1;
39285 mode0 = SImode;
39286 goto rdrand_step;
39288 case IX86_BUILTIN_RDRAND64_STEP:
39289 icode = CODE_FOR_rdranddi_1;
39290 mode0 = DImode;
39292 rdrand_step:
39293 op0 = gen_reg_rtx (mode0);
39294 emit_insn (GEN_FCN (icode) (op0));
39296 arg0 = CALL_EXPR_ARG (exp, 0);
39297 op1 = expand_normal (arg0);
39298 if (!address_operand (op1, VOIDmode))
39300 op1 = convert_memory_address (Pmode, op1);
39301 op1 = copy_addr_to_reg (op1);
39303 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39305 op1 = gen_reg_rtx (SImode);
39306 emit_move_insn (op1, CONST1_RTX (SImode));
39308 /* Emit SImode conditional move. */
39309 if (mode0 == HImode)
39311 op2 = gen_reg_rtx (SImode);
39312 emit_insn (gen_zero_extendhisi2 (op2, op0));
39314 else if (mode0 == SImode)
39315 op2 = op0;
39316 else
39317 op2 = gen_rtx_SUBREG (SImode, op0, 0);
39319 if (target == 0
39320 || !register_operand (target, SImode))
39321 target = gen_reg_rtx (SImode);
39323 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
39324 const0_rtx);
39325 emit_insn (gen_rtx_SET (VOIDmode, target,
39326 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
39327 return target;
39329 case IX86_BUILTIN_RDSEED16_STEP:
39330 icode = CODE_FOR_rdseedhi_1;
39331 mode0 = HImode;
39332 goto rdseed_step;
39334 case IX86_BUILTIN_RDSEED32_STEP:
39335 icode = CODE_FOR_rdseedsi_1;
39336 mode0 = SImode;
39337 goto rdseed_step;
39339 case IX86_BUILTIN_RDSEED64_STEP:
39340 icode = CODE_FOR_rdseeddi_1;
39341 mode0 = DImode;
39343 rdseed_step:
39344 op0 = gen_reg_rtx (mode0);
39345 emit_insn (GEN_FCN (icode) (op0));
39347 arg0 = CALL_EXPR_ARG (exp, 0);
39348 op1 = expand_normal (arg0);
39349 if (!address_operand (op1, VOIDmode))
39351 op1 = convert_memory_address (Pmode, op1);
39352 op1 = copy_addr_to_reg (op1);
39354 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39356 op2 = gen_reg_rtx (QImode);
39358 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
39359 const0_rtx);
39360 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
39362 if (target == 0
39363 || !register_operand (target, SImode))
39364 target = gen_reg_rtx (SImode);
39366 emit_insn (gen_zero_extendqisi2 (target, op2));
39367 return target;
39369 case IX86_BUILTIN_SBB32:
39370 icode = CODE_FOR_subsi3_carry;
39371 mode0 = SImode;
39372 goto addcarryx;
39374 case IX86_BUILTIN_SBB64:
39375 icode = CODE_FOR_subdi3_carry;
39376 mode0 = DImode;
39377 goto addcarryx;
39379 case IX86_BUILTIN_ADDCARRYX32:
39380 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
39381 mode0 = SImode;
39382 goto addcarryx;
39384 case IX86_BUILTIN_ADDCARRYX64:
39385 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
39386 mode0 = DImode;
39388 addcarryx:
39389 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
39390 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
39391 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
39392 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
39394 op0 = gen_reg_rtx (QImode);
39396 /* Generate CF from input operand. */
39397 op1 = expand_normal (arg0);
39398 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39399 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
39401 /* Gen ADCX instruction to compute X+Y+CF. */
39402 op2 = expand_normal (arg1);
39403 op3 = expand_normal (arg2);
39405 if (!REG_P (op2))
39406 op2 = copy_to_mode_reg (mode0, op2);
39407 if (!REG_P (op3))
39408 op3 = copy_to_mode_reg (mode0, op3);
39410 op0 = gen_reg_rtx (mode0);
39412 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
39413 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
39414 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
39416 /* Store the result. */
39417 op4 = expand_normal (arg3);
39418 if (!address_operand (op4, VOIDmode))
39420 op4 = convert_memory_address (Pmode, op4);
39421 op4 = copy_addr_to_reg (op4);
39423 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39425 /* Return current CF value. */
39426 if (target == 0)
39427 target = gen_reg_rtx (QImode);
39429 PUT_MODE (pat, QImode);
39430 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
39431 return target;
39433 case IX86_BUILTIN_READ_FLAGS:
39434 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39436 if (optimize
39437 || target == NULL_RTX
39438 || !nonimmediate_operand (target, word_mode)
39439 || GET_MODE (target) != word_mode)
39440 target = gen_reg_rtx (word_mode);
39442 emit_insn (gen_pop (target));
39443 return target;
39445 case IX86_BUILTIN_WRITE_FLAGS:
39447 arg0 = CALL_EXPR_ARG (exp, 0);
39448 op0 = expand_normal (arg0);
39449 if (!general_no_elim_operand (op0, word_mode))
39450 op0 = copy_to_mode_reg (word_mode, op0);
39452 emit_insn (gen_push (op0));
39453 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39454 return 0;
39456 case IX86_BUILTIN_KORTESTC16:
39457 icode = CODE_FOR_kortestchi;
39458 mode0 = HImode;
39459 mode1 = CCCmode;
39460 goto kortest;
39462 case IX86_BUILTIN_KORTESTZ16:
39463 icode = CODE_FOR_kortestzhi;
39464 mode0 = HImode;
39465 mode1 = CCZmode;
39467 kortest:
39468 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
39469 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
39470 op0 = expand_normal (arg0);
39471 op1 = expand_normal (arg1);
39473 op0 = copy_to_reg (op0);
39474 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39475 op1 = copy_to_reg (op1);
39476 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39478 target = gen_reg_rtx (QImode);
39479 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
39481 /* Emit kortest. */
39482 emit_insn (GEN_FCN (icode) (op0, op1));
39483 /* And use setcc to return result from flags. */
39484 ix86_expand_setcc (target, EQ,
39485 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39486 return target;
39488 case IX86_BUILTIN_GATHERSIV2DF:
39489 icode = CODE_FOR_avx2_gathersiv2df;
39490 goto gather_gen;
39491 case IX86_BUILTIN_GATHERSIV4DF:
39492 icode = CODE_FOR_avx2_gathersiv4df;
39493 goto gather_gen;
39494 case IX86_BUILTIN_GATHERDIV2DF:
39495 icode = CODE_FOR_avx2_gatherdiv2df;
39496 goto gather_gen;
39497 case IX86_BUILTIN_GATHERDIV4DF:
39498 icode = CODE_FOR_avx2_gatherdiv4df;
39499 goto gather_gen;
39500 case IX86_BUILTIN_GATHERSIV4SF:
39501 icode = CODE_FOR_avx2_gathersiv4sf;
39502 goto gather_gen;
39503 case IX86_BUILTIN_GATHERSIV8SF:
39504 icode = CODE_FOR_avx2_gathersiv8sf;
39505 goto gather_gen;
39506 case IX86_BUILTIN_GATHERDIV4SF:
39507 icode = CODE_FOR_avx2_gatherdiv4sf;
39508 goto gather_gen;
39509 case IX86_BUILTIN_GATHERDIV8SF:
39510 icode = CODE_FOR_avx2_gatherdiv8sf;
39511 goto gather_gen;
39512 case IX86_BUILTIN_GATHERSIV2DI:
39513 icode = CODE_FOR_avx2_gathersiv2di;
39514 goto gather_gen;
39515 case IX86_BUILTIN_GATHERSIV4DI:
39516 icode = CODE_FOR_avx2_gathersiv4di;
39517 goto gather_gen;
39518 case IX86_BUILTIN_GATHERDIV2DI:
39519 icode = CODE_FOR_avx2_gatherdiv2di;
39520 goto gather_gen;
39521 case IX86_BUILTIN_GATHERDIV4DI:
39522 icode = CODE_FOR_avx2_gatherdiv4di;
39523 goto gather_gen;
39524 case IX86_BUILTIN_GATHERSIV4SI:
39525 icode = CODE_FOR_avx2_gathersiv4si;
39526 goto gather_gen;
39527 case IX86_BUILTIN_GATHERSIV8SI:
39528 icode = CODE_FOR_avx2_gathersiv8si;
39529 goto gather_gen;
39530 case IX86_BUILTIN_GATHERDIV4SI:
39531 icode = CODE_FOR_avx2_gatherdiv4si;
39532 goto gather_gen;
39533 case IX86_BUILTIN_GATHERDIV8SI:
39534 icode = CODE_FOR_avx2_gatherdiv8si;
39535 goto gather_gen;
39536 case IX86_BUILTIN_GATHERALTSIV4DF:
39537 icode = CODE_FOR_avx2_gathersiv4df;
39538 goto gather_gen;
39539 case IX86_BUILTIN_GATHERALTDIV8SF:
39540 icode = CODE_FOR_avx2_gatherdiv8sf;
39541 goto gather_gen;
39542 case IX86_BUILTIN_GATHERALTSIV4DI:
39543 icode = CODE_FOR_avx2_gathersiv4di;
39544 goto gather_gen;
39545 case IX86_BUILTIN_GATHERALTDIV8SI:
39546 icode = CODE_FOR_avx2_gatherdiv8si;
39547 goto gather_gen;
39548 case IX86_BUILTIN_GATHER3SIV16SF:
39549 icode = CODE_FOR_avx512f_gathersiv16sf;
39550 goto gather_gen;
39551 case IX86_BUILTIN_GATHER3SIV8DF:
39552 icode = CODE_FOR_avx512f_gathersiv8df;
39553 goto gather_gen;
39554 case IX86_BUILTIN_GATHER3DIV16SF:
39555 icode = CODE_FOR_avx512f_gatherdiv16sf;
39556 goto gather_gen;
39557 case IX86_BUILTIN_GATHER3DIV8DF:
39558 icode = CODE_FOR_avx512f_gatherdiv8df;
39559 goto gather_gen;
39560 case IX86_BUILTIN_GATHER3SIV16SI:
39561 icode = CODE_FOR_avx512f_gathersiv16si;
39562 goto gather_gen;
39563 case IX86_BUILTIN_GATHER3SIV8DI:
39564 icode = CODE_FOR_avx512f_gathersiv8di;
39565 goto gather_gen;
39566 case IX86_BUILTIN_GATHER3DIV16SI:
39567 icode = CODE_FOR_avx512f_gatherdiv16si;
39568 goto gather_gen;
39569 case IX86_BUILTIN_GATHER3DIV8DI:
39570 icode = CODE_FOR_avx512f_gatherdiv8di;
39571 goto gather_gen;
39572 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39573 icode = CODE_FOR_avx512f_gathersiv8df;
39574 goto gather_gen;
39575 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39576 icode = CODE_FOR_avx512f_gatherdiv16sf;
39577 goto gather_gen;
39578 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39579 icode = CODE_FOR_avx512f_gathersiv8di;
39580 goto gather_gen;
39581 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39582 icode = CODE_FOR_avx512f_gatherdiv16si;
39583 goto gather_gen;
39584 case IX86_BUILTIN_GATHER3SIV2DF:
39585 icode = CODE_FOR_avx512vl_gathersiv2df;
39586 goto gather_gen;
39587 case IX86_BUILTIN_GATHER3SIV4DF:
39588 icode = CODE_FOR_avx512vl_gathersiv4df;
39589 goto gather_gen;
39590 case IX86_BUILTIN_GATHER3DIV2DF:
39591 icode = CODE_FOR_avx512vl_gatherdiv2df;
39592 goto gather_gen;
39593 case IX86_BUILTIN_GATHER3DIV4DF:
39594 icode = CODE_FOR_avx512vl_gatherdiv4df;
39595 goto gather_gen;
39596 case IX86_BUILTIN_GATHER3SIV4SF:
39597 icode = CODE_FOR_avx512vl_gathersiv4sf;
39598 goto gather_gen;
39599 case IX86_BUILTIN_GATHER3SIV8SF:
39600 icode = CODE_FOR_avx512vl_gathersiv8sf;
39601 goto gather_gen;
39602 case IX86_BUILTIN_GATHER3DIV4SF:
39603 icode = CODE_FOR_avx512vl_gatherdiv4sf;
39604 goto gather_gen;
39605 case IX86_BUILTIN_GATHER3DIV8SF:
39606 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39607 goto gather_gen;
39608 case IX86_BUILTIN_GATHER3SIV2DI:
39609 icode = CODE_FOR_avx512vl_gathersiv2di;
39610 goto gather_gen;
39611 case IX86_BUILTIN_GATHER3SIV4DI:
39612 icode = CODE_FOR_avx512vl_gathersiv4di;
39613 goto gather_gen;
39614 case IX86_BUILTIN_GATHER3DIV2DI:
39615 icode = CODE_FOR_avx512vl_gatherdiv2di;
39616 goto gather_gen;
39617 case IX86_BUILTIN_GATHER3DIV4DI:
39618 icode = CODE_FOR_avx512vl_gatherdiv4di;
39619 goto gather_gen;
39620 case IX86_BUILTIN_GATHER3SIV4SI:
39621 icode = CODE_FOR_avx512vl_gathersiv4si;
39622 goto gather_gen;
39623 case IX86_BUILTIN_GATHER3SIV8SI:
39624 icode = CODE_FOR_avx512vl_gathersiv8si;
39625 goto gather_gen;
39626 case IX86_BUILTIN_GATHER3DIV4SI:
39627 icode = CODE_FOR_avx512vl_gatherdiv4si;
39628 goto gather_gen;
39629 case IX86_BUILTIN_GATHER3DIV8SI:
39630 icode = CODE_FOR_avx512vl_gatherdiv8si;
39631 goto gather_gen;
39632 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39633 icode = CODE_FOR_avx512vl_gathersiv4df;
39634 goto gather_gen;
39635 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39636 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39637 goto gather_gen;
39638 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39639 icode = CODE_FOR_avx512vl_gathersiv4di;
39640 goto gather_gen;
39641 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39642 icode = CODE_FOR_avx512vl_gatherdiv8si;
39643 goto gather_gen;
39644 case IX86_BUILTIN_SCATTERSIV16SF:
39645 icode = CODE_FOR_avx512f_scattersiv16sf;
39646 goto scatter_gen;
39647 case IX86_BUILTIN_SCATTERSIV8DF:
39648 icode = CODE_FOR_avx512f_scattersiv8df;
39649 goto scatter_gen;
39650 case IX86_BUILTIN_SCATTERDIV16SF:
39651 icode = CODE_FOR_avx512f_scatterdiv16sf;
39652 goto scatter_gen;
39653 case IX86_BUILTIN_SCATTERDIV8DF:
39654 icode = CODE_FOR_avx512f_scatterdiv8df;
39655 goto scatter_gen;
39656 case IX86_BUILTIN_SCATTERSIV16SI:
39657 icode = CODE_FOR_avx512f_scattersiv16si;
39658 goto scatter_gen;
39659 case IX86_BUILTIN_SCATTERSIV8DI:
39660 icode = CODE_FOR_avx512f_scattersiv8di;
39661 goto scatter_gen;
39662 case IX86_BUILTIN_SCATTERDIV16SI:
39663 icode = CODE_FOR_avx512f_scatterdiv16si;
39664 goto scatter_gen;
39665 case IX86_BUILTIN_SCATTERDIV8DI:
39666 icode = CODE_FOR_avx512f_scatterdiv8di;
39667 goto scatter_gen;
39668 case IX86_BUILTIN_SCATTERSIV8SF:
39669 icode = CODE_FOR_avx512vl_scattersiv8sf;
39670 goto scatter_gen;
39671 case IX86_BUILTIN_SCATTERSIV4SF:
39672 icode = CODE_FOR_avx512vl_scattersiv4sf;
39673 goto scatter_gen;
39674 case IX86_BUILTIN_SCATTERSIV4DF:
39675 icode = CODE_FOR_avx512vl_scattersiv4df;
39676 goto scatter_gen;
39677 case IX86_BUILTIN_SCATTERSIV2DF:
39678 icode = CODE_FOR_avx512vl_scattersiv2df;
39679 goto scatter_gen;
39680 case IX86_BUILTIN_SCATTERDIV8SF:
39681 icode = CODE_FOR_avx512vl_scatterdiv8sf;
39682 goto scatter_gen;
39683 case IX86_BUILTIN_SCATTERDIV4SF:
39684 icode = CODE_FOR_avx512vl_scatterdiv4sf;
39685 goto scatter_gen;
39686 case IX86_BUILTIN_SCATTERDIV4DF:
39687 icode = CODE_FOR_avx512vl_scatterdiv4df;
39688 goto scatter_gen;
39689 case IX86_BUILTIN_SCATTERDIV2DF:
39690 icode = CODE_FOR_avx512vl_scatterdiv2df;
39691 goto scatter_gen;
39692 case IX86_BUILTIN_SCATTERSIV8SI:
39693 icode = CODE_FOR_avx512vl_scattersiv8si;
39694 goto scatter_gen;
39695 case IX86_BUILTIN_SCATTERSIV4SI:
39696 icode = CODE_FOR_avx512vl_scattersiv4si;
39697 goto scatter_gen;
39698 case IX86_BUILTIN_SCATTERSIV4DI:
39699 icode = CODE_FOR_avx512vl_scattersiv4di;
39700 goto scatter_gen;
39701 case IX86_BUILTIN_SCATTERSIV2DI:
39702 icode = CODE_FOR_avx512vl_scattersiv2di;
39703 goto scatter_gen;
39704 case IX86_BUILTIN_SCATTERDIV8SI:
39705 icode = CODE_FOR_avx512vl_scatterdiv8si;
39706 goto scatter_gen;
39707 case IX86_BUILTIN_SCATTERDIV4SI:
39708 icode = CODE_FOR_avx512vl_scatterdiv4si;
39709 goto scatter_gen;
39710 case IX86_BUILTIN_SCATTERDIV4DI:
39711 icode = CODE_FOR_avx512vl_scatterdiv4di;
39712 goto scatter_gen;
39713 case IX86_BUILTIN_SCATTERDIV2DI:
39714 icode = CODE_FOR_avx512vl_scatterdiv2di;
39715 goto scatter_gen;
39716 case IX86_BUILTIN_GATHERPFDPD:
39717 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39718 goto vec_prefetch_gen;
39719 case IX86_BUILTIN_GATHERPFDPS:
39720 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39721 goto vec_prefetch_gen;
39722 case IX86_BUILTIN_GATHERPFQPD:
39723 icode = CODE_FOR_avx512pf_gatherpfv8didf;
39724 goto vec_prefetch_gen;
39725 case IX86_BUILTIN_GATHERPFQPS:
39726 icode = CODE_FOR_avx512pf_gatherpfv8disf;
39727 goto vec_prefetch_gen;
39728 case IX86_BUILTIN_SCATTERPFDPD:
39729 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39730 goto vec_prefetch_gen;
39731 case IX86_BUILTIN_SCATTERPFDPS:
39732 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39733 goto vec_prefetch_gen;
39734 case IX86_BUILTIN_SCATTERPFQPD:
39735 icode = CODE_FOR_avx512pf_scatterpfv8didf;
39736 goto vec_prefetch_gen;
39737 case IX86_BUILTIN_SCATTERPFQPS:
39738 icode = CODE_FOR_avx512pf_scatterpfv8disf;
39739 goto vec_prefetch_gen;
39741 gather_gen:
39742 rtx half;
39743 rtx (*gen) (rtx, rtx);
39745 arg0 = CALL_EXPR_ARG (exp, 0);
39746 arg1 = CALL_EXPR_ARG (exp, 1);
39747 arg2 = CALL_EXPR_ARG (exp, 2);
39748 arg3 = CALL_EXPR_ARG (exp, 3);
39749 arg4 = CALL_EXPR_ARG (exp, 4);
39750 op0 = expand_normal (arg0);
39751 op1 = expand_normal (arg1);
39752 op2 = expand_normal (arg2);
39753 op3 = expand_normal (arg3);
39754 op4 = expand_normal (arg4);
39755 /* Note the arg order is different from the operand order. */
39756 mode0 = insn_data[icode].operand[1].mode;
39757 mode2 = insn_data[icode].operand[3].mode;
39758 mode3 = insn_data[icode].operand[4].mode;
39759 mode4 = insn_data[icode].operand[5].mode;
39761 if (target == NULL_RTX
39762 || GET_MODE (target) != insn_data[icode].operand[0].mode
39763 || !insn_data[icode].operand[0].predicate (target,
39764 GET_MODE (target)))
39765 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
39766 else
39767 subtarget = target;
39769 switch (fcode)
39771 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39772 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39773 half = gen_reg_rtx (V8SImode);
39774 if (!nonimmediate_operand (op2, V16SImode))
39775 op2 = copy_to_mode_reg (V16SImode, op2);
39776 emit_insn (gen_vec_extract_lo_v16si (half, op2));
39777 op2 = half;
39778 break;
39779 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39780 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39781 case IX86_BUILTIN_GATHERALTSIV4DF:
39782 case IX86_BUILTIN_GATHERALTSIV4DI:
39783 half = gen_reg_rtx (V4SImode);
39784 if (!nonimmediate_operand (op2, V8SImode))
39785 op2 = copy_to_mode_reg (V8SImode, op2);
39786 emit_insn (gen_vec_extract_lo_v8si (half, op2));
39787 op2 = half;
39788 break;
39789 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39790 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39791 half = gen_reg_rtx (mode0);
39792 if (mode0 == V8SFmode)
39793 gen = gen_vec_extract_lo_v16sf;
39794 else
39795 gen = gen_vec_extract_lo_v16si;
39796 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39797 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39798 emit_insn (gen (half, op0));
39799 op0 = half;
39800 if (GET_MODE (op3) != VOIDmode)
39802 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39803 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39804 emit_insn (gen (half, op3));
39805 op3 = half;
39807 break;
39808 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39809 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39810 case IX86_BUILTIN_GATHERALTDIV8SF:
39811 case IX86_BUILTIN_GATHERALTDIV8SI:
39812 half = gen_reg_rtx (mode0);
39813 if (mode0 == V4SFmode)
39814 gen = gen_vec_extract_lo_v8sf;
39815 else
39816 gen = gen_vec_extract_lo_v8si;
39817 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39818 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39819 emit_insn (gen (half, op0));
39820 op0 = half;
39821 if (GET_MODE (op3) != VOIDmode)
39823 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39824 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39825 emit_insn (gen (half, op3));
39826 op3 = half;
39828 break;
39829 default:
39830 break;
39833 /* Force memory operand only with base register here. But we
39834 don't want to do it on memory operand for other builtin
39835 functions. */
39836 op1 = ix86_zero_extend_to_Pmode (op1);
39838 if (!insn_data[icode].operand[1].predicate (op0, mode0))
39839 op0 = copy_to_mode_reg (mode0, op0);
39840 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
39841 op1 = copy_to_mode_reg (Pmode, op1);
39842 if (!insn_data[icode].operand[3].predicate (op2, mode2))
39843 op2 = copy_to_mode_reg (mode2, op2);
39844 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
39846 if (!insn_data[icode].operand[4].predicate (op3, mode3))
39847 op3 = copy_to_mode_reg (mode3, op3);
39849 else
39851 op3 = copy_to_reg (op3);
39852 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
39854 if (!insn_data[icode].operand[5].predicate (op4, mode4))
39856 error ("the last argument must be scale 1, 2, 4, 8");
39857 return const0_rtx;
39860 /* Optimize. If mask is known to have all high bits set,
39861 replace op0 with pc_rtx to signal that the instruction
39862 overwrites the whole destination and doesn't use its
39863 previous contents. */
39864 if (optimize)
39866 if (TREE_CODE (arg3) == INTEGER_CST)
39868 if (integer_all_onesp (arg3))
39869 op0 = pc_rtx;
39871 else if (TREE_CODE (arg3) == VECTOR_CST)
39873 unsigned int negative = 0;
39874 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
39876 tree cst = VECTOR_CST_ELT (arg3, i);
39877 if (TREE_CODE (cst) == INTEGER_CST
39878 && tree_int_cst_sign_bit (cst))
39879 negative++;
39880 else if (TREE_CODE (cst) == REAL_CST
39881 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
39882 negative++;
39884 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
39885 op0 = pc_rtx;
39887 else if (TREE_CODE (arg3) == SSA_NAME
39888 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
39890 /* Recognize also when mask is like:
39891 __v2df src = _mm_setzero_pd ();
39892 __v2df mask = _mm_cmpeq_pd (src, src);
39894 __v8sf src = _mm256_setzero_ps ();
39895 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
39896 as that is a cheaper way to load all ones into
39897 a register than having to load a constant from
39898 memory. */
39899 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
39900 if (is_gimple_call (def_stmt))
39902 tree fndecl = gimple_call_fndecl (def_stmt);
39903 if (fndecl
39904 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
39905 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
39907 case IX86_BUILTIN_CMPPD:
39908 case IX86_BUILTIN_CMPPS:
39909 case IX86_BUILTIN_CMPPD256:
39910 case IX86_BUILTIN_CMPPS256:
39911 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
39912 break;
39913 /* FALLTHRU */
39914 case IX86_BUILTIN_CMPEQPD:
39915 case IX86_BUILTIN_CMPEQPS:
39916 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
39917 && initializer_zerop (gimple_call_arg (def_stmt,
39918 1)))
39919 op0 = pc_rtx;
39920 break;
39921 default:
39922 break;
39928 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
39929 if (! pat)
39930 return const0_rtx;
39931 emit_insn (pat);
39933 switch (fcode)
39935 case IX86_BUILTIN_GATHER3DIV16SF:
39936 if (target == NULL_RTX)
39937 target = gen_reg_rtx (V8SFmode);
39938 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
39939 break;
39940 case IX86_BUILTIN_GATHER3DIV16SI:
39941 if (target == NULL_RTX)
39942 target = gen_reg_rtx (V8SImode);
39943 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
39944 break;
39945 case IX86_BUILTIN_GATHER3DIV8SF:
39946 case IX86_BUILTIN_GATHERDIV8SF:
39947 if (target == NULL_RTX)
39948 target = gen_reg_rtx (V4SFmode);
39949 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
39950 break;
39951 case IX86_BUILTIN_GATHER3DIV8SI:
39952 case IX86_BUILTIN_GATHERDIV8SI:
39953 if (target == NULL_RTX)
39954 target = gen_reg_rtx (V4SImode);
39955 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
39956 break;
39957 default:
39958 target = subtarget;
39959 break;
39961 return target;
39963 scatter_gen:
39964 arg0 = CALL_EXPR_ARG (exp, 0);
39965 arg1 = CALL_EXPR_ARG (exp, 1);
39966 arg2 = CALL_EXPR_ARG (exp, 2);
39967 arg3 = CALL_EXPR_ARG (exp, 3);
39968 arg4 = CALL_EXPR_ARG (exp, 4);
39969 op0 = expand_normal (arg0);
39970 op1 = expand_normal (arg1);
39971 op2 = expand_normal (arg2);
39972 op3 = expand_normal (arg3);
39973 op4 = expand_normal (arg4);
39974 mode1 = insn_data[icode].operand[1].mode;
39975 mode2 = insn_data[icode].operand[2].mode;
39976 mode3 = insn_data[icode].operand[3].mode;
39977 mode4 = insn_data[icode].operand[4].mode;
39979 /* Force memory operand only with base register here. But we
39980 don't want to do it on memory operand for other builtin
39981 functions. */
39982 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
39984 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39985 op0 = copy_to_mode_reg (Pmode, op0);
39987 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
39989 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39990 op1 = copy_to_mode_reg (mode1, op1);
39992 else
39994 op1 = copy_to_reg (op1);
39995 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
39998 if (!insn_data[icode].operand[2].predicate (op2, mode2))
39999 op2 = copy_to_mode_reg (mode2, op2);
40001 if (!insn_data[icode].operand[3].predicate (op3, mode3))
40002 op3 = copy_to_mode_reg (mode3, op3);
40004 if (!insn_data[icode].operand[4].predicate (op4, mode4))
40006 error ("the last argument must be scale 1, 2, 4, 8");
40007 return const0_rtx;
40010 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40011 if (! pat)
40012 return const0_rtx;
40014 emit_insn (pat);
40015 return 0;
40017 vec_prefetch_gen:
40018 arg0 = CALL_EXPR_ARG (exp, 0);
40019 arg1 = CALL_EXPR_ARG (exp, 1);
40020 arg2 = CALL_EXPR_ARG (exp, 2);
40021 arg3 = CALL_EXPR_ARG (exp, 3);
40022 arg4 = CALL_EXPR_ARG (exp, 4);
40023 op0 = expand_normal (arg0);
40024 op1 = expand_normal (arg1);
40025 op2 = expand_normal (arg2);
40026 op3 = expand_normal (arg3);
40027 op4 = expand_normal (arg4);
40028 mode0 = insn_data[icode].operand[0].mode;
40029 mode1 = insn_data[icode].operand[1].mode;
40030 mode3 = insn_data[icode].operand[3].mode;
40031 mode4 = insn_data[icode].operand[4].mode;
40033 if (GET_MODE (op0) == mode0
40034 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
40036 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40037 op0 = copy_to_mode_reg (mode0, op0);
40039 else if (op0 != constm1_rtx)
40041 op0 = copy_to_reg (op0);
40042 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
40045 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40046 op1 = copy_to_mode_reg (mode1, op1);
40048 /* Force memory operand only with base register here. But we
40049 don't want to do it on memory operand for other builtin
40050 functions. */
40051 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
40053 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
40054 op2 = copy_to_mode_reg (Pmode, op2);
40056 if (!insn_data[icode].operand[3].predicate (op3, mode3))
40058 error ("the forth argument must be scale 1, 2, 4, 8");
40059 return const0_rtx;
40062 if (!insn_data[icode].operand[4].predicate (op4, mode4))
40064 error ("incorrect hint operand");
40065 return const0_rtx;
40068 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40069 if (! pat)
40070 return const0_rtx;
40072 emit_insn (pat);
40074 return 0;
40076 case IX86_BUILTIN_XABORT:
40077 icode = CODE_FOR_xabort;
40078 arg0 = CALL_EXPR_ARG (exp, 0);
40079 op0 = expand_normal (arg0);
40080 mode0 = insn_data[icode].operand[0].mode;
40081 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40083 error ("the xabort's argument must be an 8-bit immediate");
40084 return const0_rtx;
40086 emit_insn (gen_xabort (op0));
40087 return 0;
40089 default:
40090 break;
40093 for (i = 0, d = bdesc_special_args;
40094 i < ARRAY_SIZE (bdesc_special_args);
40095 i++, d++)
40096 if (d->code == fcode)
40097 return ix86_expand_special_args_builtin (d, exp, target);
40099 for (i = 0, d = bdesc_args;
40100 i < ARRAY_SIZE (bdesc_args);
40101 i++, d++)
40102 if (d->code == fcode)
40103 switch (fcode)
40105 case IX86_BUILTIN_FABSQ:
40106 case IX86_BUILTIN_COPYSIGNQ:
40107 if (!TARGET_SSE)
40108 /* Emit a normal call if SSE isn't available. */
40109 return expand_call (exp, target, ignore);
40110 default:
40111 return ix86_expand_args_builtin (d, exp, target);
40114 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
40115 if (d->code == fcode)
40116 return ix86_expand_sse_comi (d, exp, target);
40118 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
40119 if (d->code == fcode)
40120 return ix86_expand_round_builtin (d, exp, target);
40122 for (i = 0, d = bdesc_pcmpestr;
40123 i < ARRAY_SIZE (bdesc_pcmpestr);
40124 i++, d++)
40125 if (d->code == fcode)
40126 return ix86_expand_sse_pcmpestr (d, exp, target);
40128 for (i = 0, d = bdesc_pcmpistr;
40129 i < ARRAY_SIZE (bdesc_pcmpistr);
40130 i++, d++)
40131 if (d->code == fcode)
40132 return ix86_expand_sse_pcmpistr (d, exp, target);
40134 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
40135 if (d->code == fcode)
40136 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
40137 (enum ix86_builtin_func_type)
40138 d->flag, d->comparison);
40140 gcc_unreachable ();
40143 /* This returns the target-specific builtin with code CODE if
40144 current_function_decl has visibility on this builtin, which is checked
40145 using isa flags. Returns NULL_TREE otherwise. */
40147 static tree ix86_get_builtin (enum ix86_builtins code)
40149 struct cl_target_option *opts;
40150 tree target_tree = NULL_TREE;
40152 /* Determine the isa flags of current_function_decl. */
40154 if (current_function_decl)
40155 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
40157 if (target_tree == NULL)
40158 target_tree = target_option_default_node;
40160 opts = TREE_TARGET_OPTION (target_tree);
40162 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
40163 return ix86_builtin_decl (code, true);
40164 else
40165 return NULL_TREE;
40168 /* Return function decl for target specific builtin
40169 for given MPX builtin passed i FCODE. */
40170 static tree
40171 ix86_builtin_mpx_function (unsigned fcode)
40173 switch (fcode)
40175 case BUILT_IN_CHKP_BNDMK:
40176 return ix86_builtins[IX86_BUILTIN_BNDMK];
40178 case BUILT_IN_CHKP_BNDSTX:
40179 return ix86_builtins[IX86_BUILTIN_BNDSTX];
40181 case BUILT_IN_CHKP_BNDLDX:
40182 return ix86_builtins[IX86_BUILTIN_BNDLDX];
40184 case BUILT_IN_CHKP_BNDCL:
40185 return ix86_builtins[IX86_BUILTIN_BNDCL];
40187 case BUILT_IN_CHKP_BNDCU:
40188 return ix86_builtins[IX86_BUILTIN_BNDCU];
40190 case BUILT_IN_CHKP_BNDRET:
40191 return ix86_builtins[IX86_BUILTIN_BNDRET];
40193 case BUILT_IN_CHKP_INTERSECT:
40194 return ix86_builtins[IX86_BUILTIN_BNDINT];
40196 case BUILT_IN_CHKP_NARROW:
40197 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
40199 case BUILT_IN_CHKP_SIZEOF:
40200 return ix86_builtins[IX86_BUILTIN_SIZEOF];
40202 case BUILT_IN_CHKP_EXTRACT_LOWER:
40203 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
40205 case BUILT_IN_CHKP_EXTRACT_UPPER:
40206 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
40208 default:
40209 return NULL_TREE;
40212 gcc_unreachable ();
40215 /* Helper function for ix86_load_bounds and ix86_store_bounds.
40217 Return an address to be used to load/store bounds for pointer
40218 passed in SLOT.
40220 SLOT_NO is an integer constant holding number of a target
40221 dependent special slot to be used in case SLOT is not a memory.
40223 SPECIAL_BASE is a pointer to be used as a base of fake address
40224 to access special slots in Bounds Table. SPECIAL_BASE[-1],
40225 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
40227 static rtx
40228 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
40230 rtx addr = NULL;
40232 /* NULL slot means we pass bounds for pointer not passed to the
40233 function at all. Register slot means we pass pointer in a
40234 register. In both these cases bounds are passed via Bounds
40235 Table. Since we do not have actual pointer stored in memory,
40236 we have to use fake addresses to access Bounds Table. We
40237 start with (special_base - sizeof (void*)) and decrease this
40238 address by pointer size to get addresses for other slots. */
40239 if (!slot || REG_P (slot))
40241 gcc_assert (CONST_INT_P (slot_no));
40242 addr = plus_constant (Pmode, special_base,
40243 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
40245 /* If pointer is passed in a memory then its address is used to
40246 access Bounds Table. */
40247 else if (MEM_P (slot))
40249 addr = XEXP (slot, 0);
40250 if (!register_operand (addr, Pmode))
40251 addr = copy_addr_to_reg (addr);
40253 else
40254 gcc_unreachable ();
40256 return addr;
40259 /* Expand pass uses this hook to load bounds for function parameter
40260 PTR passed in SLOT in case its bounds are not passed in a register.
40262 If SLOT is a memory, then bounds are loaded as for regular pointer
40263 loaded from memory. PTR may be NULL in case SLOT is a memory.
40264 In such case value of PTR (if required) may be loaded from SLOT.
40266 If SLOT is NULL or a register then SLOT_NO is an integer constant
40267 holding number of the target dependent special slot which should be
40268 used to obtain bounds.
40270 Return loaded bounds. */
40272 static rtx
40273 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
40275 rtx reg = gen_reg_rtx (BNDmode);
40276 rtx addr;
40278 /* Get address to be used to access Bounds Table. Special slots start
40279 at the location of return address of the current function. */
40280 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
40282 /* Load pointer value from a memory if we don't have it. */
40283 if (!ptr)
40285 gcc_assert (MEM_P (slot));
40286 ptr = copy_addr_to_reg (slot);
40289 emit_insn (BNDmode == BND64mode
40290 ? gen_bnd64_ldx (reg, addr, ptr)
40291 : gen_bnd32_ldx (reg, addr, ptr));
40293 return reg;
40296 /* Expand pass uses this hook to store BOUNDS for call argument PTR
40297 passed in SLOT in case BOUNDS are not passed in a register.
40299 If SLOT is a memory, then BOUNDS are stored as for regular pointer
40300 stored in memory. PTR may be NULL in case SLOT is a memory.
40301 In such case value of PTR (if required) may be loaded from SLOT.
40303 If SLOT is NULL or a register then SLOT_NO is an integer constant
40304 holding number of the target dependent special slot which should be
40305 used to store BOUNDS. */
40307 static void
40308 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
40310 rtx addr;
40312 /* Get address to be used to access Bounds Table. Special slots start
40313 at the location of return address of a called function. */
40314 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
40316 /* Load pointer value from a memory if we don't have it. */
40317 if (!ptr)
40319 gcc_assert (MEM_P (slot));
40320 ptr = copy_addr_to_reg (slot);
40323 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
40324 if (!register_operand (bounds, BNDmode))
40325 bounds = copy_to_mode_reg (BNDmode, bounds);
40327 emit_insn (BNDmode == BND64mode
40328 ? gen_bnd64_stx (addr, ptr, bounds)
40329 : gen_bnd32_stx (addr, ptr, bounds));
40332 /* Load and return bounds returned by function in SLOT. */
40334 static rtx
40335 ix86_load_returned_bounds (rtx slot)
40337 rtx res;
40339 gcc_assert (REG_P (slot));
40340 res = gen_reg_rtx (BNDmode);
40341 emit_move_insn (res, slot);
40343 return res;
40346 /* Store BOUNDS returned by function into SLOT. */
40348 static void
40349 ix86_store_returned_bounds (rtx slot, rtx bounds)
40351 gcc_assert (REG_P (slot));
40352 emit_move_insn (slot, bounds);
40355 /* Returns a function decl for a vectorized version of the builtin function
40356 with builtin function code FN and the result vector type TYPE, or NULL_TREE
40357 if it is not available. */
40359 static tree
40360 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
40361 tree type_in)
40363 machine_mode in_mode, out_mode;
40364 int in_n, out_n;
40365 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
40367 if (TREE_CODE (type_out) != VECTOR_TYPE
40368 || TREE_CODE (type_in) != VECTOR_TYPE
40369 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40370 return NULL_TREE;
40372 out_mode = TYPE_MODE (TREE_TYPE (type_out));
40373 out_n = TYPE_VECTOR_SUBPARTS (type_out);
40374 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40375 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40377 switch (fn)
40379 case BUILT_IN_SQRT:
40380 if (out_mode == DFmode && in_mode == DFmode)
40382 if (out_n == 2 && in_n == 2)
40383 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40384 else if (out_n == 4 && in_n == 4)
40385 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40386 else if (out_n == 8 && in_n == 8)
40387 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40389 break;
40391 case BUILT_IN_EXP2F:
40392 if (out_mode == SFmode && in_mode == SFmode)
40394 if (out_n == 16 && in_n == 16)
40395 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40397 break;
40399 case BUILT_IN_SQRTF:
40400 if (out_mode == SFmode && in_mode == SFmode)
40402 if (out_n == 4 && in_n == 4)
40403 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40404 else if (out_n == 8 && in_n == 8)
40405 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40406 else if (out_n == 16 && in_n == 16)
40407 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40409 break;
40411 case BUILT_IN_IFLOOR:
40412 case BUILT_IN_LFLOOR:
40413 case BUILT_IN_LLFLOOR:
40414 /* The round insn does not trap on denormals. */
40415 if (flag_trapping_math || !TARGET_ROUND)
40416 break;
40418 if (out_mode == SImode && in_mode == DFmode)
40420 if (out_n == 4 && in_n == 2)
40421 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40422 else if (out_n == 8 && in_n == 4)
40423 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40424 else if (out_n == 16 && in_n == 8)
40425 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40427 break;
40429 case BUILT_IN_IFLOORF:
40430 case BUILT_IN_LFLOORF:
40431 case BUILT_IN_LLFLOORF:
40432 /* The round insn does not trap on denormals. */
40433 if (flag_trapping_math || !TARGET_ROUND)
40434 break;
40436 if (out_mode == SImode && in_mode == SFmode)
40438 if (out_n == 4 && in_n == 4)
40439 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40440 else if (out_n == 8 && in_n == 8)
40441 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40443 break;
40445 case BUILT_IN_ICEIL:
40446 case BUILT_IN_LCEIL:
40447 case BUILT_IN_LLCEIL:
40448 /* The round insn does not trap on denormals. */
40449 if (flag_trapping_math || !TARGET_ROUND)
40450 break;
40452 if (out_mode == SImode && in_mode == DFmode)
40454 if (out_n == 4 && in_n == 2)
40455 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40456 else if (out_n == 8 && in_n == 4)
40457 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40458 else if (out_n == 16 && in_n == 8)
40459 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40461 break;
40463 case BUILT_IN_ICEILF:
40464 case BUILT_IN_LCEILF:
40465 case BUILT_IN_LLCEILF:
40466 /* The round insn does not trap on denormals. */
40467 if (flag_trapping_math || !TARGET_ROUND)
40468 break;
40470 if (out_mode == SImode && in_mode == SFmode)
40472 if (out_n == 4 && in_n == 4)
40473 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40474 else if (out_n == 8 && in_n == 8)
40475 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40477 break;
40479 case BUILT_IN_IRINT:
40480 case BUILT_IN_LRINT:
40481 case BUILT_IN_LLRINT:
40482 if (out_mode == SImode && in_mode == DFmode)
40484 if (out_n == 4 && in_n == 2)
40485 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40486 else if (out_n == 8 && in_n == 4)
40487 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40489 break;
40491 case BUILT_IN_IRINTF:
40492 case BUILT_IN_LRINTF:
40493 case BUILT_IN_LLRINTF:
40494 if (out_mode == SImode && in_mode == SFmode)
40496 if (out_n == 4 && in_n == 4)
40497 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40498 else if (out_n == 8 && in_n == 8)
40499 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40501 break;
40503 case BUILT_IN_IROUND:
40504 case BUILT_IN_LROUND:
40505 case BUILT_IN_LLROUND:
40506 /* The round insn does not trap on denormals. */
40507 if (flag_trapping_math || !TARGET_ROUND)
40508 break;
40510 if (out_mode == SImode && in_mode == DFmode)
40512 if (out_n == 4 && in_n == 2)
40513 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40514 else if (out_n == 8 && in_n == 4)
40515 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40516 else if (out_n == 16 && in_n == 8)
40517 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40519 break;
40521 case BUILT_IN_IROUNDF:
40522 case BUILT_IN_LROUNDF:
40523 case BUILT_IN_LLROUNDF:
40524 /* The round insn does not trap on denormals. */
40525 if (flag_trapping_math || !TARGET_ROUND)
40526 break;
40528 if (out_mode == SImode && in_mode == SFmode)
40530 if (out_n == 4 && in_n == 4)
40531 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40532 else if (out_n == 8 && in_n == 8)
40533 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40535 break;
40537 case BUILT_IN_COPYSIGN:
40538 if (out_mode == DFmode && in_mode == DFmode)
40540 if (out_n == 2 && in_n == 2)
40541 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40542 else if (out_n == 4 && in_n == 4)
40543 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40544 else if (out_n == 8 && in_n == 8)
40545 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40547 break;
40549 case BUILT_IN_COPYSIGNF:
40550 if (out_mode == SFmode && in_mode == SFmode)
40552 if (out_n == 4 && in_n == 4)
40553 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40554 else if (out_n == 8 && in_n == 8)
40555 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40556 else if (out_n == 16 && in_n == 16)
40557 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40559 break;
40561 case BUILT_IN_FLOOR:
40562 /* The round insn does not trap on denormals. */
40563 if (flag_trapping_math || !TARGET_ROUND)
40564 break;
40566 if (out_mode == DFmode && in_mode == DFmode)
40568 if (out_n == 2 && in_n == 2)
40569 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40570 else if (out_n == 4 && in_n == 4)
40571 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40573 break;
40575 case BUILT_IN_FLOORF:
40576 /* The round insn does not trap on denormals. */
40577 if (flag_trapping_math || !TARGET_ROUND)
40578 break;
40580 if (out_mode == SFmode && in_mode == SFmode)
40582 if (out_n == 4 && in_n == 4)
40583 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40584 else if (out_n == 8 && in_n == 8)
40585 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40587 break;
40589 case BUILT_IN_CEIL:
40590 /* The round insn does not trap on denormals. */
40591 if (flag_trapping_math || !TARGET_ROUND)
40592 break;
40594 if (out_mode == DFmode && in_mode == DFmode)
40596 if (out_n == 2 && in_n == 2)
40597 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40598 else if (out_n == 4 && in_n == 4)
40599 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40601 break;
40603 case BUILT_IN_CEILF:
40604 /* The round insn does not trap on denormals. */
40605 if (flag_trapping_math || !TARGET_ROUND)
40606 break;
40608 if (out_mode == SFmode && in_mode == SFmode)
40610 if (out_n == 4 && in_n == 4)
40611 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40612 else if (out_n == 8 && in_n == 8)
40613 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40615 break;
40617 case BUILT_IN_TRUNC:
40618 /* The round insn does not trap on denormals. */
40619 if (flag_trapping_math || !TARGET_ROUND)
40620 break;
40622 if (out_mode == DFmode && in_mode == DFmode)
40624 if (out_n == 2 && in_n == 2)
40625 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40626 else if (out_n == 4 && in_n == 4)
40627 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40629 break;
40631 case BUILT_IN_TRUNCF:
40632 /* The round insn does not trap on denormals. */
40633 if (flag_trapping_math || !TARGET_ROUND)
40634 break;
40636 if (out_mode == SFmode && in_mode == SFmode)
40638 if (out_n == 4 && in_n == 4)
40639 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40640 else if (out_n == 8 && in_n == 8)
40641 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40643 break;
40645 case BUILT_IN_RINT:
40646 /* The round insn does not trap on denormals. */
40647 if (flag_trapping_math || !TARGET_ROUND)
40648 break;
40650 if (out_mode == DFmode && in_mode == DFmode)
40652 if (out_n == 2 && in_n == 2)
40653 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40654 else if (out_n == 4 && in_n == 4)
40655 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40657 break;
40659 case BUILT_IN_RINTF:
40660 /* The round insn does not trap on denormals. */
40661 if (flag_trapping_math || !TARGET_ROUND)
40662 break;
40664 if (out_mode == SFmode && in_mode == SFmode)
40666 if (out_n == 4 && in_n == 4)
40667 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40668 else if (out_n == 8 && in_n == 8)
40669 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40671 break;
40673 case BUILT_IN_ROUND:
40674 /* The round insn does not trap on denormals. */
40675 if (flag_trapping_math || !TARGET_ROUND)
40676 break;
40678 if (out_mode == DFmode && in_mode == DFmode)
40680 if (out_n == 2 && in_n == 2)
40681 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40682 else if (out_n == 4 && in_n == 4)
40683 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40685 break;
40687 case BUILT_IN_ROUNDF:
40688 /* The round insn does not trap on denormals. */
40689 if (flag_trapping_math || !TARGET_ROUND)
40690 break;
40692 if (out_mode == SFmode && in_mode == SFmode)
40694 if (out_n == 4 && in_n == 4)
40695 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40696 else if (out_n == 8 && in_n == 8)
40697 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40699 break;
40701 case BUILT_IN_FMA:
40702 if (out_mode == DFmode && in_mode == DFmode)
40704 if (out_n == 2 && in_n == 2)
40705 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40706 if (out_n == 4 && in_n == 4)
40707 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40709 break;
40711 case BUILT_IN_FMAF:
40712 if (out_mode == SFmode && in_mode == SFmode)
40714 if (out_n == 4 && in_n == 4)
40715 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40716 if (out_n == 8 && in_n == 8)
40717 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40719 break;
40721 default:
40722 break;
40725 /* Dispatch to a handler for a vectorization library. */
40726 if (ix86_veclib_handler)
40727 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40728 type_in);
40730 return NULL_TREE;
40733 /* Handler for an SVML-style interface to
40734 a library with vectorized intrinsics. */
40736 static tree
40737 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
40739 char name[20];
40740 tree fntype, new_fndecl, args;
40741 unsigned arity;
40742 const char *bname;
40743 machine_mode el_mode, in_mode;
40744 int n, in_n;
40746 /* The SVML is suitable for unsafe math only. */
40747 if (!flag_unsafe_math_optimizations)
40748 return NULL_TREE;
40750 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40751 n = TYPE_VECTOR_SUBPARTS (type_out);
40752 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40753 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40754 if (el_mode != in_mode
40755 || n != in_n)
40756 return NULL_TREE;
40758 switch (fn)
40760 case BUILT_IN_EXP:
40761 case BUILT_IN_LOG:
40762 case BUILT_IN_LOG10:
40763 case BUILT_IN_POW:
40764 case BUILT_IN_TANH:
40765 case BUILT_IN_TAN:
40766 case BUILT_IN_ATAN:
40767 case BUILT_IN_ATAN2:
40768 case BUILT_IN_ATANH:
40769 case BUILT_IN_CBRT:
40770 case BUILT_IN_SINH:
40771 case BUILT_IN_SIN:
40772 case BUILT_IN_ASINH:
40773 case BUILT_IN_ASIN:
40774 case BUILT_IN_COSH:
40775 case BUILT_IN_COS:
40776 case BUILT_IN_ACOSH:
40777 case BUILT_IN_ACOS:
40778 if (el_mode != DFmode || n != 2)
40779 return NULL_TREE;
40780 break;
40782 case BUILT_IN_EXPF:
40783 case BUILT_IN_LOGF:
40784 case BUILT_IN_LOG10F:
40785 case BUILT_IN_POWF:
40786 case BUILT_IN_TANHF:
40787 case BUILT_IN_TANF:
40788 case BUILT_IN_ATANF:
40789 case BUILT_IN_ATAN2F:
40790 case BUILT_IN_ATANHF:
40791 case BUILT_IN_CBRTF:
40792 case BUILT_IN_SINHF:
40793 case BUILT_IN_SINF:
40794 case BUILT_IN_ASINHF:
40795 case BUILT_IN_ASINF:
40796 case BUILT_IN_COSHF:
40797 case BUILT_IN_COSF:
40798 case BUILT_IN_ACOSHF:
40799 case BUILT_IN_ACOSF:
40800 if (el_mode != SFmode || n != 4)
40801 return NULL_TREE;
40802 break;
40804 default:
40805 return NULL_TREE;
40808 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40810 if (fn == BUILT_IN_LOGF)
40811 strcpy (name, "vmlsLn4");
40812 else if (fn == BUILT_IN_LOG)
40813 strcpy (name, "vmldLn2");
40814 else if (n == 4)
40816 sprintf (name, "vmls%s", bname+10);
40817 name[strlen (name)-1] = '4';
40819 else
40820 sprintf (name, "vmld%s2", bname+10);
40822 /* Convert to uppercase. */
40823 name[4] &= ~0x20;
40825 arity = 0;
40826 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40827 args;
40828 args = TREE_CHAIN (args))
40829 arity++;
40831 if (arity == 1)
40832 fntype = build_function_type_list (type_out, type_in, NULL);
40833 else
40834 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40836 /* Build a function declaration for the vectorized function. */
40837 new_fndecl = build_decl (BUILTINS_LOCATION,
40838 FUNCTION_DECL, get_identifier (name), fntype);
40839 TREE_PUBLIC (new_fndecl) = 1;
40840 DECL_EXTERNAL (new_fndecl) = 1;
40841 DECL_IS_NOVOPS (new_fndecl) = 1;
40842 TREE_READONLY (new_fndecl) = 1;
40844 return new_fndecl;
40847 /* Handler for an ACML-style interface to
40848 a library with vectorized intrinsics. */
40850 static tree
40851 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
40853 char name[20] = "__vr.._";
40854 tree fntype, new_fndecl, args;
40855 unsigned arity;
40856 const char *bname;
40857 machine_mode el_mode, in_mode;
40858 int n, in_n;
40860 /* The ACML is 64bits only and suitable for unsafe math only as
40861 it does not correctly support parts of IEEE with the required
40862 precision such as denormals. */
40863 if (!TARGET_64BIT
40864 || !flag_unsafe_math_optimizations)
40865 return NULL_TREE;
40867 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40868 n = TYPE_VECTOR_SUBPARTS (type_out);
40869 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40870 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40871 if (el_mode != in_mode
40872 || n != in_n)
40873 return NULL_TREE;
40875 switch (fn)
40877 case BUILT_IN_SIN:
40878 case BUILT_IN_COS:
40879 case BUILT_IN_EXP:
40880 case BUILT_IN_LOG:
40881 case BUILT_IN_LOG2:
40882 case BUILT_IN_LOG10:
40883 name[4] = 'd';
40884 name[5] = '2';
40885 if (el_mode != DFmode
40886 || n != 2)
40887 return NULL_TREE;
40888 break;
40890 case BUILT_IN_SINF:
40891 case BUILT_IN_COSF:
40892 case BUILT_IN_EXPF:
40893 case BUILT_IN_POWF:
40894 case BUILT_IN_LOGF:
40895 case BUILT_IN_LOG2F:
40896 case BUILT_IN_LOG10F:
40897 name[4] = 's';
40898 name[5] = '4';
40899 if (el_mode != SFmode
40900 || n != 4)
40901 return NULL_TREE;
40902 break;
40904 default:
40905 return NULL_TREE;
40908 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40909 sprintf (name + 7, "%s", bname+10);
40911 arity = 0;
40912 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40913 args;
40914 args = TREE_CHAIN (args))
40915 arity++;
40917 if (arity == 1)
40918 fntype = build_function_type_list (type_out, type_in, NULL);
40919 else
40920 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40922 /* Build a function declaration for the vectorized function. */
40923 new_fndecl = build_decl (BUILTINS_LOCATION,
40924 FUNCTION_DECL, get_identifier (name), fntype);
40925 TREE_PUBLIC (new_fndecl) = 1;
40926 DECL_EXTERNAL (new_fndecl) = 1;
40927 DECL_IS_NOVOPS (new_fndecl) = 1;
40928 TREE_READONLY (new_fndecl) = 1;
40930 return new_fndecl;
40933 /* Returns a decl of a function that implements gather load with
40934 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
40935 Return NULL_TREE if it is not available. */
40937 static tree
40938 ix86_vectorize_builtin_gather (const_tree mem_vectype,
40939 const_tree index_type, int scale)
40941 bool si;
40942 enum ix86_builtins code;
40944 if (! TARGET_AVX2)
40945 return NULL_TREE;
40947 if ((TREE_CODE (index_type) != INTEGER_TYPE
40948 && !POINTER_TYPE_P (index_type))
40949 || (TYPE_MODE (index_type) != SImode
40950 && TYPE_MODE (index_type) != DImode))
40951 return NULL_TREE;
40953 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
40954 return NULL_TREE;
40956 /* v*gather* insn sign extends index to pointer mode. */
40957 if (TYPE_PRECISION (index_type) < POINTER_SIZE
40958 && TYPE_UNSIGNED (index_type))
40959 return NULL_TREE;
40961 if (scale <= 0
40962 || scale > 8
40963 || (scale & (scale - 1)) != 0)
40964 return NULL_TREE;
40966 si = TYPE_MODE (index_type) == SImode;
40967 switch (TYPE_MODE (mem_vectype))
40969 case V2DFmode:
40970 if (TARGET_AVX512VL)
40971 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
40972 else
40973 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
40974 break;
40975 case V4DFmode:
40976 if (TARGET_AVX512VL)
40977 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
40978 else
40979 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
40980 break;
40981 case V2DImode:
40982 if (TARGET_AVX512VL)
40983 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
40984 else
40985 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
40986 break;
40987 case V4DImode:
40988 if (TARGET_AVX512VL)
40989 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
40990 else
40991 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
40992 break;
40993 case V4SFmode:
40994 if (TARGET_AVX512VL)
40995 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
40996 else
40997 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
40998 break;
40999 case V8SFmode:
41000 if (TARGET_AVX512VL)
41001 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
41002 else
41003 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
41004 break;
41005 case V4SImode:
41006 if (TARGET_AVX512VL)
41007 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
41008 else
41009 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
41010 break;
41011 case V8SImode:
41012 if (TARGET_AVX512VL)
41013 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
41014 else
41015 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
41016 break;
41017 case V8DFmode:
41018 if (TARGET_AVX512F)
41019 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
41020 else
41021 return NULL_TREE;
41022 break;
41023 case V8DImode:
41024 if (TARGET_AVX512F)
41025 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
41026 else
41027 return NULL_TREE;
41028 break;
41029 case V16SFmode:
41030 if (TARGET_AVX512F)
41031 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
41032 else
41033 return NULL_TREE;
41034 break;
41035 case V16SImode:
41036 if (TARGET_AVX512F)
41037 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
41038 else
41039 return NULL_TREE;
41040 break;
41041 default:
41042 return NULL_TREE;
41045 return ix86_get_builtin (code);
41048 /* Returns a code for a target-specific builtin that implements
41049 reciprocal of the function, or NULL_TREE if not available. */
41051 static tree
41052 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
41054 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
41055 && flag_finite_math_only && !flag_trapping_math
41056 && flag_unsafe_math_optimizations))
41057 return NULL_TREE;
41059 if (md_fn)
41060 /* Machine dependent builtins. */
41061 switch (fn)
41063 /* Vectorized version of sqrt to rsqrt conversion. */
41064 case IX86_BUILTIN_SQRTPS_NR:
41065 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
41067 case IX86_BUILTIN_SQRTPS_NR256:
41068 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
41070 default:
41071 return NULL_TREE;
41073 else
41074 /* Normal builtins. */
41075 switch (fn)
41077 /* Sqrt to rsqrt conversion. */
41078 case BUILT_IN_SQRTF:
41079 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
41081 default:
41082 return NULL_TREE;
41086 /* Helper for avx_vpermilps256_operand et al. This is also used by
41087 the expansion functions to turn the parallel back into a mask.
41088 The return value is 0 for no match and the imm8+1 for a match. */
41091 avx_vpermilp_parallel (rtx par, machine_mode mode)
41093 unsigned i, nelt = GET_MODE_NUNITS (mode);
41094 unsigned mask = 0;
41095 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
41097 if (XVECLEN (par, 0) != (int) nelt)
41098 return 0;
41100 /* Validate that all of the elements are constants, and not totally
41101 out of range. Copy the data into an integral array to make the
41102 subsequent checks easier. */
41103 for (i = 0; i < nelt; ++i)
41105 rtx er = XVECEXP (par, 0, i);
41106 unsigned HOST_WIDE_INT ei;
41108 if (!CONST_INT_P (er))
41109 return 0;
41110 ei = INTVAL (er);
41111 if (ei >= nelt)
41112 return 0;
41113 ipar[i] = ei;
41116 switch (mode)
41118 case V8DFmode:
41119 /* In the 512-bit DFmode case, we can only move elements within
41120 a 128-bit lane. First fill the second part of the mask,
41121 then fallthru. */
41122 for (i = 4; i < 6; ++i)
41124 if (ipar[i] < 4 || ipar[i] >= 6)
41125 return 0;
41126 mask |= (ipar[i] - 4) << i;
41128 for (i = 6; i < 8; ++i)
41130 if (ipar[i] < 6)
41131 return 0;
41132 mask |= (ipar[i] - 6) << i;
41134 /* FALLTHRU */
41136 case V4DFmode:
41137 /* In the 256-bit DFmode case, we can only move elements within
41138 a 128-bit lane. */
41139 for (i = 0; i < 2; ++i)
41141 if (ipar[i] >= 2)
41142 return 0;
41143 mask |= ipar[i] << i;
41145 for (i = 2; i < 4; ++i)
41147 if (ipar[i] < 2)
41148 return 0;
41149 mask |= (ipar[i] - 2) << i;
41151 break;
41153 case V16SFmode:
41154 /* In 512 bit SFmode case, permutation in the upper 256 bits
41155 must mirror the permutation in the lower 256-bits. */
41156 for (i = 0; i < 8; ++i)
41157 if (ipar[i] + 8 != ipar[i + 8])
41158 return 0;
41159 /* FALLTHRU */
41161 case V8SFmode:
41162 /* In 256 bit SFmode case, we have full freedom of
41163 movement within the low 128-bit lane, but the high 128-bit
41164 lane must mirror the exact same pattern. */
41165 for (i = 0; i < 4; ++i)
41166 if (ipar[i] + 4 != ipar[i + 4])
41167 return 0;
41168 nelt = 4;
41169 /* FALLTHRU */
41171 case V2DFmode:
41172 case V4SFmode:
41173 /* In the 128-bit case, we've full freedom in the placement of
41174 the elements from the source operand. */
41175 for (i = 0; i < nelt; ++i)
41176 mask |= ipar[i] << (i * (nelt / 2));
41177 break;
41179 default:
41180 gcc_unreachable ();
41183 /* Make sure success has a non-zero value by adding one. */
41184 return mask + 1;
41187 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
41188 the expansion functions to turn the parallel back into a mask.
41189 The return value is 0 for no match and the imm8+1 for a match. */
41192 avx_vperm2f128_parallel (rtx par, machine_mode mode)
41194 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
41195 unsigned mask = 0;
41196 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
41198 if (XVECLEN (par, 0) != (int) nelt)
41199 return 0;
41201 /* Validate that all of the elements are constants, and not totally
41202 out of range. Copy the data into an integral array to make the
41203 subsequent checks easier. */
41204 for (i = 0; i < nelt; ++i)
41206 rtx er = XVECEXP (par, 0, i);
41207 unsigned HOST_WIDE_INT ei;
41209 if (!CONST_INT_P (er))
41210 return 0;
41211 ei = INTVAL (er);
41212 if (ei >= 2 * nelt)
41213 return 0;
41214 ipar[i] = ei;
41217 /* Validate that the halves of the permute are halves. */
41218 for (i = 0; i < nelt2 - 1; ++i)
41219 if (ipar[i] + 1 != ipar[i + 1])
41220 return 0;
41221 for (i = nelt2; i < nelt - 1; ++i)
41222 if (ipar[i] + 1 != ipar[i + 1])
41223 return 0;
41225 /* Reconstruct the mask. */
41226 for (i = 0; i < 2; ++i)
41228 unsigned e = ipar[i * nelt2];
41229 if (e % nelt2)
41230 return 0;
41231 e /= nelt2;
41232 mask |= e << (i * 4);
41235 /* Make sure success has a non-zero value by adding one. */
41236 return mask + 1;
41239 /* Return a register priority for hard reg REGNO. */
41240 static int
41241 ix86_register_priority (int hard_regno)
41243 /* ebp and r13 as the base always wants a displacement, r12 as the
41244 base always wants an index. So discourage their usage in an
41245 address. */
41246 if (hard_regno == R12_REG || hard_regno == R13_REG)
41247 return 0;
41248 if (hard_regno == BP_REG)
41249 return 1;
41250 /* New x86-64 int registers result in bigger code size. Discourage
41251 them. */
41252 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
41253 return 2;
41254 /* New x86-64 SSE registers result in bigger code size. Discourage
41255 them. */
41256 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
41257 return 2;
41258 /* Usage of AX register results in smaller code. Prefer it. */
41259 if (hard_regno == AX_REG)
41260 return 4;
41261 return 3;
41264 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
41266 Put float CONST_DOUBLE in the constant pool instead of fp regs.
41267 QImode must go into class Q_REGS.
41268 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
41269 movdf to do mem-to-mem moves through integer regs. */
41271 static reg_class_t
41272 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
41274 machine_mode mode = GET_MODE (x);
41276 /* We're only allowed to return a subclass of CLASS. Many of the
41277 following checks fail for NO_REGS, so eliminate that early. */
41278 if (regclass == NO_REGS)
41279 return NO_REGS;
41281 /* All classes can load zeros. */
41282 if (x == CONST0_RTX (mode))
41283 return regclass;
41285 /* Force constants into memory if we are loading a (nonzero) constant into
41286 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
41287 instructions to load from a constant. */
41288 if (CONSTANT_P (x)
41289 && (MAYBE_MMX_CLASS_P (regclass)
41290 || MAYBE_SSE_CLASS_P (regclass)
41291 || MAYBE_MASK_CLASS_P (regclass)))
41292 return NO_REGS;
41294 /* Prefer SSE regs only, if we can use them for math. */
41295 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
41296 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
41298 /* Floating-point constants need more complex checks. */
41299 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
41301 /* General regs can load everything. */
41302 if (reg_class_subset_p (regclass, GENERAL_REGS))
41303 return regclass;
41305 /* Floats can load 0 and 1 plus some others. Note that we eliminated
41306 zero above. We only want to wind up preferring 80387 registers if
41307 we plan on doing computation with them. */
41308 if (TARGET_80387
41309 && standard_80387_constant_p (x) > 0)
41311 /* Limit class to non-sse. */
41312 if (regclass == FLOAT_SSE_REGS)
41313 return FLOAT_REGS;
41314 if (regclass == FP_TOP_SSE_REGS)
41315 return FP_TOP_REG;
41316 if (regclass == FP_SECOND_SSE_REGS)
41317 return FP_SECOND_REG;
41318 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
41319 return regclass;
41322 return NO_REGS;
41325 /* Generally when we see PLUS here, it's the function invariant
41326 (plus soft-fp const_int). Which can only be computed into general
41327 regs. */
41328 if (GET_CODE (x) == PLUS)
41329 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
41331 /* QImode constants are easy to load, but non-constant QImode data
41332 must go into Q_REGS. */
41333 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
41335 if (reg_class_subset_p (regclass, Q_REGS))
41336 return regclass;
41337 if (reg_class_subset_p (Q_REGS, regclass))
41338 return Q_REGS;
41339 return NO_REGS;
41342 return regclass;
41345 /* Discourage putting floating-point values in SSE registers unless
41346 SSE math is being used, and likewise for the 387 registers. */
41347 static reg_class_t
41348 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
41350 machine_mode mode = GET_MODE (x);
41352 /* Restrict the output reload class to the register bank that we are doing
41353 math on. If we would like not to return a subset of CLASS, reject this
41354 alternative: if reload cannot do this, it will still use its choice. */
41355 mode = GET_MODE (x);
41356 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
41357 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
41359 if (X87_FLOAT_MODE_P (mode))
41361 if (regclass == FP_TOP_SSE_REGS)
41362 return FP_TOP_REG;
41363 else if (regclass == FP_SECOND_SSE_REGS)
41364 return FP_SECOND_REG;
41365 else
41366 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41369 return regclass;
41372 static reg_class_t
41373 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41374 machine_mode mode, secondary_reload_info *sri)
41376 /* Double-word spills from general registers to non-offsettable memory
41377 references (zero-extended addresses) require special handling. */
41378 if (TARGET_64BIT
41379 && MEM_P (x)
41380 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41381 && INTEGER_CLASS_P (rclass)
41382 && !offsettable_memref_p (x))
41384 sri->icode = (in_p
41385 ? CODE_FOR_reload_noff_load
41386 : CODE_FOR_reload_noff_store);
41387 /* Add the cost of moving address to a temporary. */
41388 sri->extra_cost = 1;
41390 return NO_REGS;
41393 /* QImode spills from non-QI registers require
41394 intermediate register on 32bit targets. */
41395 if (mode == QImode
41396 && (MAYBE_MASK_CLASS_P (rclass)
41397 || (!TARGET_64BIT && !in_p
41398 && INTEGER_CLASS_P (rclass)
41399 && MAYBE_NON_Q_CLASS_P (rclass))))
41401 int regno;
41403 if (REG_P (x))
41404 regno = REGNO (x);
41405 else
41406 regno = -1;
41408 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
41409 regno = true_regnum (x);
41411 /* Return Q_REGS if the operand is in memory. */
41412 if (regno == -1)
41413 return Q_REGS;
41416 /* This condition handles corner case where an expression involving
41417 pointers gets vectorized. We're trying to use the address of a
41418 stack slot as a vector initializer.
41420 (set (reg:V2DI 74 [ vect_cst_.2 ])
41421 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41423 Eventually frame gets turned into sp+offset like this:
41425 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41426 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41427 (const_int 392 [0x188]))))
41429 That later gets turned into:
41431 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41432 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41433 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41435 We'll have the following reload recorded:
41437 Reload 0: reload_in (DI) =
41438 (plus:DI (reg/f:DI 7 sp)
41439 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41440 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41441 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41442 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41443 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41444 reload_reg_rtx: (reg:V2DI 22 xmm1)
41446 Which isn't going to work since SSE instructions can't handle scalar
41447 additions. Returning GENERAL_REGS forces the addition into integer
41448 register and reload can handle subsequent reloads without problems. */
41450 if (in_p && GET_CODE (x) == PLUS
41451 && SSE_CLASS_P (rclass)
41452 && SCALAR_INT_MODE_P (mode))
41453 return GENERAL_REGS;
41455 return NO_REGS;
41458 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
41460 static bool
41461 ix86_class_likely_spilled_p (reg_class_t rclass)
41463 switch (rclass)
41465 case AREG:
41466 case DREG:
41467 case CREG:
41468 case BREG:
41469 case AD_REGS:
41470 case SIREG:
41471 case DIREG:
41472 case SSE_FIRST_REG:
41473 case FP_TOP_REG:
41474 case FP_SECOND_REG:
41475 case BND_REGS:
41476 return true;
41478 default:
41479 break;
41482 return false;
41485 /* If we are copying between general and FP registers, we need a memory
41486 location. The same is true for SSE and MMX registers.
41488 To optimize register_move_cost performance, allow inline variant.
41490 The macro can't work reliably when one of the CLASSES is class containing
41491 registers from multiple units (SSE, MMX, integer). We avoid this by never
41492 combining those units in single alternative in the machine description.
41493 Ensure that this constraint holds to avoid unexpected surprises.
41495 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41496 enforce these sanity checks. */
41498 static inline bool
41499 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41500 machine_mode mode, int strict)
41502 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41503 return false;
41504 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41505 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41506 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41507 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41508 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41509 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41511 gcc_assert (!strict || lra_in_progress);
41512 return true;
41515 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41516 return true;
41518 /* Between mask and general, we have moves no larger than word size. */
41519 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41520 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41521 return true;
41523 /* ??? This is a lie. We do have moves between mmx/general, and for
41524 mmx/sse2. But by saying we need secondary memory we discourage the
41525 register allocator from using the mmx registers unless needed. */
41526 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41527 return true;
41529 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41531 /* SSE1 doesn't have any direct moves from other classes. */
41532 if (!TARGET_SSE2)
41533 return true;
41535 /* If the target says that inter-unit moves are more expensive
41536 than moving through memory, then don't generate them. */
41537 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41538 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41539 return true;
41541 /* Between SSE and general, we have moves no larger than word size. */
41542 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41543 return true;
41546 return false;
41549 bool
41550 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41551 machine_mode mode, int strict)
41553 return inline_secondary_memory_needed (class1, class2, mode, strict);
41556 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41558 On the 80386, this is the size of MODE in words,
41559 except in the FP regs, where a single reg is always enough. */
41561 static unsigned char
41562 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41564 if (MAYBE_INTEGER_CLASS_P (rclass))
41566 if (mode == XFmode)
41567 return (TARGET_64BIT ? 2 : 3);
41568 else if (mode == XCmode)
41569 return (TARGET_64BIT ? 4 : 6);
41570 else
41571 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41573 else
41575 if (COMPLEX_MODE_P (mode))
41576 return 2;
41577 else
41578 return 1;
41582 /* Return true if the registers in CLASS cannot represent the change from
41583 modes FROM to TO. */
41585 bool
41586 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41587 enum reg_class regclass)
41589 if (from == to)
41590 return false;
41592 /* x87 registers can't do subreg at all, as all values are reformatted
41593 to extended precision. */
41594 if (MAYBE_FLOAT_CLASS_P (regclass))
41595 return true;
41597 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41599 /* Vector registers do not support QI or HImode loads. If we don't
41600 disallow a change to these modes, reload will assume it's ok to
41601 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
41602 the vec_dupv4hi pattern. */
41603 if (GET_MODE_SIZE (from) < 4)
41604 return true;
41607 return false;
41610 /* Return the cost of moving data of mode M between a
41611 register and memory. A value of 2 is the default; this cost is
41612 relative to those in `REGISTER_MOVE_COST'.
41614 This function is used extensively by register_move_cost that is used to
41615 build tables at startup. Make it inline in this case.
41616 When IN is 2, return maximum of in and out move cost.
41618 If moving between registers and memory is more expensive than
41619 between two registers, you should define this macro to express the
41620 relative cost.
41622 Model also increased moving costs of QImode registers in non
41623 Q_REGS classes.
41625 static inline int
41626 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41627 int in)
41629 int cost;
41630 if (FLOAT_CLASS_P (regclass))
41632 int index;
41633 switch (mode)
41635 case SFmode:
41636 index = 0;
41637 break;
41638 case DFmode:
41639 index = 1;
41640 break;
41641 case XFmode:
41642 index = 2;
41643 break;
41644 default:
41645 return 100;
41647 if (in == 2)
41648 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41649 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41651 if (SSE_CLASS_P (regclass))
41653 int index;
41654 switch (GET_MODE_SIZE (mode))
41656 case 4:
41657 index = 0;
41658 break;
41659 case 8:
41660 index = 1;
41661 break;
41662 case 16:
41663 index = 2;
41664 break;
41665 default:
41666 return 100;
41668 if (in == 2)
41669 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41670 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41672 if (MMX_CLASS_P (regclass))
41674 int index;
41675 switch (GET_MODE_SIZE (mode))
41677 case 4:
41678 index = 0;
41679 break;
41680 case 8:
41681 index = 1;
41682 break;
41683 default:
41684 return 100;
41686 if (in)
41687 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41688 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41690 switch (GET_MODE_SIZE (mode))
41692 case 1:
41693 if (Q_CLASS_P (regclass) || TARGET_64BIT)
41695 if (!in)
41696 return ix86_cost->int_store[0];
41697 if (TARGET_PARTIAL_REG_DEPENDENCY
41698 && optimize_function_for_speed_p (cfun))
41699 cost = ix86_cost->movzbl_load;
41700 else
41701 cost = ix86_cost->int_load[0];
41702 if (in == 2)
41703 return MAX (cost, ix86_cost->int_store[0]);
41704 return cost;
41706 else
41708 if (in == 2)
41709 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41710 if (in)
41711 return ix86_cost->movzbl_load;
41712 else
41713 return ix86_cost->int_store[0] + 4;
41715 break;
41716 case 2:
41717 if (in == 2)
41718 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41719 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41720 default:
41721 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
41722 if (mode == TFmode)
41723 mode = XFmode;
41724 if (in == 2)
41725 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
41726 else if (in)
41727 cost = ix86_cost->int_load[2];
41728 else
41729 cost = ix86_cost->int_store[2];
41730 return (cost * (((int) GET_MODE_SIZE (mode)
41731 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
41735 static int
41736 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
41737 bool in)
41739 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
41743 /* Return the cost of moving data from a register in class CLASS1 to
41744 one in class CLASS2.
41746 It is not required that the cost always equal 2 when FROM is the same as TO;
41747 on some machines it is expensive to move between registers if they are not
41748 general registers. */
41750 static int
41751 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
41752 reg_class_t class2_i)
41754 enum reg_class class1 = (enum reg_class) class1_i;
41755 enum reg_class class2 = (enum reg_class) class2_i;
41757 /* In case we require secondary memory, compute cost of the store followed
41758 by load. In order to avoid bad register allocation choices, we need
41759 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
41761 if (inline_secondary_memory_needed (class1, class2, mode, 0))
41763 int cost = 1;
41765 cost += inline_memory_move_cost (mode, class1, 2);
41766 cost += inline_memory_move_cost (mode, class2, 2);
41768 /* In case of copying from general_purpose_register we may emit multiple
41769 stores followed by single load causing memory size mismatch stall.
41770 Count this as arbitrarily high cost of 20. */
41771 if (targetm.class_max_nregs (class1, mode)
41772 > targetm.class_max_nregs (class2, mode))
41773 cost += 20;
41775 /* In the case of FP/MMX moves, the registers actually overlap, and we
41776 have to switch modes in order to treat them differently. */
41777 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
41778 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
41779 cost += 20;
41781 return cost;
41784 /* Moves between SSE/MMX and integer unit are expensive. */
41785 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
41786 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41788 /* ??? By keeping returned value relatively high, we limit the number
41789 of moves between integer and MMX/SSE registers for all targets.
41790 Additionally, high value prevents problem with x86_modes_tieable_p(),
41791 where integer modes in MMX/SSE registers are not tieable
41792 because of missing QImode and HImode moves to, from or between
41793 MMX/SSE registers. */
41794 return MAX (8, ix86_cost->mmxsse_to_integer);
41796 if (MAYBE_FLOAT_CLASS_P (class1))
41797 return ix86_cost->fp_move;
41798 if (MAYBE_SSE_CLASS_P (class1))
41799 return ix86_cost->sse_move;
41800 if (MAYBE_MMX_CLASS_P (class1))
41801 return ix86_cost->mmx_move;
41802 return 2;
41805 /* Return TRUE if hard register REGNO can hold a value of machine-mode
41806 MODE. */
41808 bool
41809 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
41811 /* Flags and only flags can only hold CCmode values. */
41812 if (CC_REGNO_P (regno))
41813 return GET_MODE_CLASS (mode) == MODE_CC;
41814 if (GET_MODE_CLASS (mode) == MODE_CC
41815 || GET_MODE_CLASS (mode) == MODE_RANDOM
41816 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
41817 return false;
41818 if (STACK_REGNO_P (regno))
41819 return VALID_FP_MODE_P (mode);
41820 if (MASK_REGNO_P (regno))
41821 return (VALID_MASK_REG_MODE (mode)
41822 || (TARGET_AVX512BW
41823 && VALID_MASK_AVX512BW_MODE (mode)));
41824 if (BND_REGNO_P (regno))
41825 return VALID_BND_REG_MODE (mode);
41826 if (SSE_REGNO_P (regno))
41828 /* We implement the move patterns for all vector modes into and
41829 out of SSE registers, even when no operation instructions
41830 are available. */
41832 /* For AVX-512 we allow, regardless of regno:
41833 - XI mode
41834 - any of 512-bit wide vector mode
41835 - any scalar mode. */
41836 if (TARGET_AVX512F
41837 && (mode == XImode
41838 || VALID_AVX512F_REG_MODE (mode)
41839 || VALID_AVX512F_SCALAR_MODE (mode)))
41840 return true;
41842 /* TODO check for QI/HI scalars. */
41843 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
41844 if (TARGET_AVX512VL
41845 && (mode == OImode
41846 || mode == TImode
41847 || VALID_AVX256_REG_MODE (mode)
41848 || VALID_AVX512VL_128_REG_MODE (mode)))
41849 return true;
41851 /* xmm16-xmm31 are only available for AVX-512. */
41852 if (EXT_REX_SSE_REGNO_P (regno))
41853 return false;
41855 /* OImode and AVX modes are available only when AVX is enabled. */
41856 return ((TARGET_AVX
41857 && VALID_AVX256_REG_OR_OI_MODE (mode))
41858 || VALID_SSE_REG_MODE (mode)
41859 || VALID_SSE2_REG_MODE (mode)
41860 || VALID_MMX_REG_MODE (mode)
41861 || VALID_MMX_REG_MODE_3DNOW (mode));
41863 if (MMX_REGNO_P (regno))
41865 /* We implement the move patterns for 3DNOW modes even in MMX mode,
41866 so if the register is available at all, then we can move data of
41867 the given mode into or out of it. */
41868 return (VALID_MMX_REG_MODE (mode)
41869 || VALID_MMX_REG_MODE_3DNOW (mode));
41872 if (mode == QImode)
41874 /* Take care for QImode values - they can be in non-QI regs,
41875 but then they do cause partial register stalls. */
41876 if (ANY_QI_REGNO_P (regno))
41877 return true;
41878 if (!TARGET_PARTIAL_REG_STALL)
41879 return true;
41880 /* LRA checks if the hard register is OK for the given mode.
41881 QImode values can live in non-QI regs, so we allow all
41882 registers here. */
41883 if (lra_in_progress)
41884 return true;
41885 return !can_create_pseudo_p ();
41887 /* We handle both integer and floats in the general purpose registers. */
41888 else if (VALID_INT_MODE_P (mode))
41889 return true;
41890 else if (VALID_FP_MODE_P (mode))
41891 return true;
41892 else if (VALID_DFP_MODE_P (mode))
41893 return true;
41894 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
41895 on to use that value in smaller contexts, this can easily force a
41896 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
41897 supporting DImode, allow it. */
41898 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
41899 return true;
41901 return false;
41904 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
41905 tieable integer mode. */
41907 static bool
41908 ix86_tieable_integer_mode_p (machine_mode mode)
41910 switch (mode)
41912 case HImode:
41913 case SImode:
41914 return true;
41916 case QImode:
41917 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
41919 case DImode:
41920 return TARGET_64BIT;
41922 default:
41923 return false;
41927 /* Return true if MODE1 is accessible in a register that can hold MODE2
41928 without copying. That is, all register classes that can hold MODE2
41929 can also hold MODE1. */
41931 bool
41932 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
41934 if (mode1 == mode2)
41935 return true;
41937 if (ix86_tieable_integer_mode_p (mode1)
41938 && ix86_tieable_integer_mode_p (mode2))
41939 return true;
41941 /* MODE2 being XFmode implies fp stack or general regs, which means we
41942 can tie any smaller floating point modes to it. Note that we do not
41943 tie this with TFmode. */
41944 if (mode2 == XFmode)
41945 return mode1 == SFmode || mode1 == DFmode;
41947 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
41948 that we can tie it with SFmode. */
41949 if (mode2 == DFmode)
41950 return mode1 == SFmode;
41952 /* If MODE2 is only appropriate for an SSE register, then tie with
41953 any other mode acceptable to SSE registers. */
41954 if (GET_MODE_SIZE (mode2) == 32
41955 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41956 return (GET_MODE_SIZE (mode1) == 32
41957 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41958 if (GET_MODE_SIZE (mode2) == 16
41959 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41960 return (GET_MODE_SIZE (mode1) == 16
41961 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41963 /* If MODE2 is appropriate for an MMX register, then tie
41964 with any other mode acceptable to MMX registers. */
41965 if (GET_MODE_SIZE (mode2) == 8
41966 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
41967 return (GET_MODE_SIZE (mode1) == 8
41968 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
41970 return false;
41973 /* Return the cost of moving between two registers of mode MODE. */
41975 static int
41976 ix86_set_reg_reg_cost (machine_mode mode)
41978 unsigned int units = UNITS_PER_WORD;
41980 switch (GET_MODE_CLASS (mode))
41982 default:
41983 break;
41985 case MODE_CC:
41986 units = GET_MODE_SIZE (CCmode);
41987 break;
41989 case MODE_FLOAT:
41990 if ((TARGET_SSE && mode == TFmode)
41991 || (TARGET_80387 && mode == XFmode)
41992 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
41993 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
41994 units = GET_MODE_SIZE (mode);
41995 break;
41997 case MODE_COMPLEX_FLOAT:
41998 if ((TARGET_SSE && mode == TCmode)
41999 || (TARGET_80387 && mode == XCmode)
42000 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
42001 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
42002 units = GET_MODE_SIZE (mode);
42003 break;
42005 case MODE_VECTOR_INT:
42006 case MODE_VECTOR_FLOAT:
42007 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
42008 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
42009 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
42010 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
42011 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
42012 units = GET_MODE_SIZE (mode);
42015 /* Return the cost of moving between two registers of mode MODE,
42016 assuming that the move will be in pieces of at most UNITS bytes. */
42017 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
42020 /* Compute a (partial) cost for rtx X. Return true if the complete
42021 cost has been computed, and false if subexpressions should be
42022 scanned. In either case, *TOTAL contains the cost result. */
42024 static bool
42025 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
42026 bool speed)
42028 rtx mask;
42029 enum rtx_code code = (enum rtx_code) code_i;
42030 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
42031 machine_mode mode = GET_MODE (x);
42032 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
42034 switch (code)
42036 case SET:
42037 if (register_operand (SET_DEST (x), VOIDmode)
42038 && reg_or_0_operand (SET_SRC (x), VOIDmode))
42040 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
42041 return true;
42043 return false;
42045 case CONST_INT:
42046 case CONST:
42047 case LABEL_REF:
42048 case SYMBOL_REF:
42049 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
42050 *total = 3;
42051 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
42052 *total = 2;
42053 else if (flag_pic && SYMBOLIC_CONST (x)
42054 && !(TARGET_64BIT
42055 && (GET_CODE (x) == LABEL_REF
42056 || (GET_CODE (x) == SYMBOL_REF
42057 && SYMBOL_REF_LOCAL_P (x)))))
42058 *total = 1;
42059 else
42060 *total = 0;
42061 return true;
42063 case CONST_DOUBLE:
42064 if (mode == VOIDmode)
42066 *total = 0;
42067 return true;
42069 switch (standard_80387_constant_p (x))
42071 case 1: /* 0.0 */
42072 *total = 1;
42073 return true;
42074 default: /* Other constants */
42075 *total = 2;
42076 return true;
42077 case 0:
42078 case -1:
42079 break;
42081 if (SSE_FLOAT_MODE_P (mode))
42083 case CONST_VECTOR:
42084 switch (standard_sse_constant_p (x))
42086 case 0:
42087 break;
42088 case 1: /* 0: xor eliminates false dependency */
42089 *total = 0;
42090 return true;
42091 default: /* -1: cmp contains false dependency */
42092 *total = 1;
42093 return true;
42096 /* Fall back to (MEM (SYMBOL_REF)), since that's where
42097 it'll probably end up. Add a penalty for size. */
42098 *total = (COSTS_N_INSNS (1)
42099 + (flag_pic != 0 && !TARGET_64BIT)
42100 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
42101 return true;
42103 case ZERO_EXTEND:
42104 /* The zero extensions is often completely free on x86_64, so make
42105 it as cheap as possible. */
42106 if (TARGET_64BIT && mode == DImode
42107 && GET_MODE (XEXP (x, 0)) == SImode)
42108 *total = 1;
42109 else if (TARGET_ZERO_EXTEND_WITH_AND)
42110 *total = cost->add;
42111 else
42112 *total = cost->movzx;
42113 return false;
42115 case SIGN_EXTEND:
42116 *total = cost->movsx;
42117 return false;
42119 case ASHIFT:
42120 if (SCALAR_INT_MODE_P (mode)
42121 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
42122 && CONST_INT_P (XEXP (x, 1)))
42124 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42125 if (value == 1)
42127 *total = cost->add;
42128 return false;
42130 if ((value == 2 || value == 3)
42131 && cost->lea <= cost->shift_const)
42133 *total = cost->lea;
42134 return false;
42137 /* FALLTHRU */
42139 case ROTATE:
42140 case ASHIFTRT:
42141 case LSHIFTRT:
42142 case ROTATERT:
42143 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42145 /* ??? Should be SSE vector operation cost. */
42146 /* At least for published AMD latencies, this really is the same
42147 as the latency for a simple fpu operation like fabs. */
42148 /* V*QImode is emulated with 1-11 insns. */
42149 if (mode == V16QImode || mode == V32QImode)
42151 int count = 11;
42152 if (TARGET_XOP && mode == V16QImode)
42154 /* For XOP we use vpshab, which requires a broadcast of the
42155 value to the variable shift insn. For constants this
42156 means a V16Q const in mem; even when we can perform the
42157 shift with one insn set the cost to prefer paddb. */
42158 if (CONSTANT_P (XEXP (x, 1)))
42160 *total = (cost->fabs
42161 + rtx_cost (XEXP (x, 0), code, 0, speed)
42162 + (speed ? 2 : COSTS_N_BYTES (16)));
42163 return true;
42165 count = 3;
42167 else if (TARGET_SSSE3)
42168 count = 7;
42169 *total = cost->fabs * count;
42171 else
42172 *total = cost->fabs;
42174 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42176 if (CONST_INT_P (XEXP (x, 1)))
42178 if (INTVAL (XEXP (x, 1)) > 32)
42179 *total = cost->shift_const + COSTS_N_INSNS (2);
42180 else
42181 *total = cost->shift_const * 2;
42183 else
42185 if (GET_CODE (XEXP (x, 1)) == AND)
42186 *total = cost->shift_var * 2;
42187 else
42188 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
42191 else
42193 if (CONST_INT_P (XEXP (x, 1)))
42194 *total = cost->shift_const;
42195 else if (GET_CODE (XEXP (x, 1)) == SUBREG
42196 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
42198 /* Return the cost after shift-and truncation. */
42199 *total = cost->shift_var;
42200 return true;
42202 else
42203 *total = cost->shift_var;
42205 return false;
42207 case FMA:
42209 rtx sub;
42211 gcc_assert (FLOAT_MODE_P (mode));
42212 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
42214 /* ??? SSE scalar/vector cost should be used here. */
42215 /* ??? Bald assumption that fma has the same cost as fmul. */
42216 *total = cost->fmul;
42217 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
42219 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
42220 sub = XEXP (x, 0);
42221 if (GET_CODE (sub) == NEG)
42222 sub = XEXP (sub, 0);
42223 *total += rtx_cost (sub, FMA, 0, speed);
42225 sub = XEXP (x, 2);
42226 if (GET_CODE (sub) == NEG)
42227 sub = XEXP (sub, 0);
42228 *total += rtx_cost (sub, FMA, 2, speed);
42229 return true;
42232 case MULT:
42233 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42235 /* ??? SSE scalar cost should be used here. */
42236 *total = cost->fmul;
42237 return false;
42239 else if (X87_FLOAT_MODE_P (mode))
42241 *total = cost->fmul;
42242 return false;
42244 else if (FLOAT_MODE_P (mode))
42246 /* ??? SSE vector cost should be used here. */
42247 *total = cost->fmul;
42248 return false;
42250 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42252 /* V*QImode is emulated with 7-13 insns. */
42253 if (mode == V16QImode || mode == V32QImode)
42255 int extra = 11;
42256 if (TARGET_XOP && mode == V16QImode)
42257 extra = 5;
42258 else if (TARGET_SSSE3)
42259 extra = 6;
42260 *total = cost->fmul * 2 + cost->fabs * extra;
42262 /* V*DImode is emulated with 5-8 insns. */
42263 else if (mode == V2DImode || mode == V4DImode)
42265 if (TARGET_XOP && mode == V2DImode)
42266 *total = cost->fmul * 2 + cost->fabs * 3;
42267 else
42268 *total = cost->fmul * 3 + cost->fabs * 5;
42270 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
42271 insns, including two PMULUDQ. */
42272 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
42273 *total = cost->fmul * 2 + cost->fabs * 5;
42274 else
42275 *total = cost->fmul;
42276 return false;
42278 else
42280 rtx op0 = XEXP (x, 0);
42281 rtx op1 = XEXP (x, 1);
42282 int nbits;
42283 if (CONST_INT_P (XEXP (x, 1)))
42285 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42286 for (nbits = 0; value != 0; value &= value - 1)
42287 nbits++;
42289 else
42290 /* This is arbitrary. */
42291 nbits = 7;
42293 /* Compute costs correctly for widening multiplication. */
42294 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
42295 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
42296 == GET_MODE_SIZE (mode))
42298 int is_mulwiden = 0;
42299 machine_mode inner_mode = GET_MODE (op0);
42301 if (GET_CODE (op0) == GET_CODE (op1))
42302 is_mulwiden = 1, op1 = XEXP (op1, 0);
42303 else if (CONST_INT_P (op1))
42305 if (GET_CODE (op0) == SIGN_EXTEND)
42306 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
42307 == INTVAL (op1);
42308 else
42309 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
42312 if (is_mulwiden)
42313 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
42316 *total = (cost->mult_init[MODE_INDEX (mode)]
42317 + nbits * cost->mult_bit
42318 + rtx_cost (op0, outer_code, opno, speed)
42319 + rtx_cost (op1, outer_code, opno, speed));
42321 return true;
42324 case DIV:
42325 case UDIV:
42326 case MOD:
42327 case UMOD:
42328 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42329 /* ??? SSE cost should be used here. */
42330 *total = cost->fdiv;
42331 else if (X87_FLOAT_MODE_P (mode))
42332 *total = cost->fdiv;
42333 else if (FLOAT_MODE_P (mode))
42334 /* ??? SSE vector cost should be used here. */
42335 *total = cost->fdiv;
42336 else
42337 *total = cost->divide[MODE_INDEX (mode)];
42338 return false;
42340 case PLUS:
42341 if (GET_MODE_CLASS (mode) == MODE_INT
42342 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
42344 if (GET_CODE (XEXP (x, 0)) == PLUS
42345 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
42346 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
42347 && CONSTANT_P (XEXP (x, 1)))
42349 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
42350 if (val == 2 || val == 4 || val == 8)
42352 *total = cost->lea;
42353 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42354 outer_code, opno, speed);
42355 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
42356 outer_code, opno, speed);
42357 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42358 return true;
42361 else if (GET_CODE (XEXP (x, 0)) == MULT
42362 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
42364 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
42365 if (val == 2 || val == 4 || val == 8)
42367 *total = cost->lea;
42368 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42369 outer_code, opno, speed);
42370 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42371 return true;
42374 else if (GET_CODE (XEXP (x, 0)) == PLUS)
42376 *total = cost->lea;
42377 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42378 outer_code, opno, speed);
42379 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42380 outer_code, opno, speed);
42381 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42382 return true;
42385 /* FALLTHRU */
42387 case MINUS:
42388 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42390 /* ??? SSE cost should be used here. */
42391 *total = cost->fadd;
42392 return false;
42394 else if (X87_FLOAT_MODE_P (mode))
42396 *total = cost->fadd;
42397 return false;
42399 else if (FLOAT_MODE_P (mode))
42401 /* ??? SSE vector cost should be used here. */
42402 *total = cost->fadd;
42403 return false;
42405 /* FALLTHRU */
42407 case AND:
42408 case IOR:
42409 case XOR:
42410 if (GET_MODE_CLASS (mode) == MODE_INT
42411 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42413 *total = (cost->add * 2
42414 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
42415 << (GET_MODE (XEXP (x, 0)) != DImode))
42416 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
42417 << (GET_MODE (XEXP (x, 1)) != DImode)));
42418 return true;
42420 /* FALLTHRU */
42422 case NEG:
42423 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42425 /* ??? SSE cost should be used here. */
42426 *total = cost->fchs;
42427 return false;
42429 else if (X87_FLOAT_MODE_P (mode))
42431 *total = cost->fchs;
42432 return false;
42434 else if (FLOAT_MODE_P (mode))
42436 /* ??? SSE vector cost should be used here. */
42437 *total = cost->fchs;
42438 return false;
42440 /* FALLTHRU */
42442 case NOT:
42443 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42445 /* ??? Should be SSE vector operation cost. */
42446 /* At least for published AMD latencies, this really is the same
42447 as the latency for a simple fpu operation like fabs. */
42448 *total = cost->fabs;
42450 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42451 *total = cost->add * 2;
42452 else
42453 *total = cost->add;
42454 return false;
42456 case COMPARE:
42457 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42458 && XEXP (XEXP (x, 0), 1) == const1_rtx
42459 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42460 && XEXP (x, 1) == const0_rtx)
42462 /* This kind of construct is implemented using test[bwl].
42463 Treat it as if we had an AND. */
42464 *total = (cost->add
42465 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
42466 + rtx_cost (const1_rtx, outer_code, opno, speed));
42467 return true;
42469 return false;
42471 case FLOAT_EXTEND:
42472 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42473 *total = 0;
42474 return false;
42476 case ABS:
42477 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42478 /* ??? SSE cost should be used here. */
42479 *total = cost->fabs;
42480 else if (X87_FLOAT_MODE_P (mode))
42481 *total = cost->fabs;
42482 else if (FLOAT_MODE_P (mode))
42483 /* ??? SSE vector cost should be used here. */
42484 *total = cost->fabs;
42485 return false;
42487 case SQRT:
42488 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42489 /* ??? SSE cost should be used here. */
42490 *total = cost->fsqrt;
42491 else if (X87_FLOAT_MODE_P (mode))
42492 *total = cost->fsqrt;
42493 else if (FLOAT_MODE_P (mode))
42494 /* ??? SSE vector cost should be used here. */
42495 *total = cost->fsqrt;
42496 return false;
42498 case UNSPEC:
42499 if (XINT (x, 1) == UNSPEC_TP)
42500 *total = 0;
42501 return false;
42503 case VEC_SELECT:
42504 case VEC_CONCAT:
42505 case VEC_DUPLICATE:
42506 /* ??? Assume all of these vector manipulation patterns are
42507 recognizable. In which case they all pretty much have the
42508 same cost. */
42509 *total = cost->fabs;
42510 return true;
42511 case VEC_MERGE:
42512 mask = XEXP (x, 2);
42513 /* This is masked instruction, assume the same cost,
42514 as nonmasked variant. */
42515 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42516 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
42517 else
42518 *total = cost->fabs;
42519 return true;
42521 default:
42522 return false;
42526 #if TARGET_MACHO
42528 static int current_machopic_label_num;
42530 /* Given a symbol name and its associated stub, write out the
42531 definition of the stub. */
42533 void
42534 machopic_output_stub (FILE *file, const char *symb, const char *stub)
42536 unsigned int length;
42537 char *binder_name, *symbol_name, lazy_ptr_name[32];
42538 int label = ++current_machopic_label_num;
42540 /* For 64-bit we shouldn't get here. */
42541 gcc_assert (!TARGET_64BIT);
42543 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
42544 symb = targetm.strip_name_encoding (symb);
42546 length = strlen (stub);
42547 binder_name = XALLOCAVEC (char, length + 32);
42548 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42550 length = strlen (symb);
42551 symbol_name = XALLOCAVEC (char, length + 32);
42552 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42554 sprintf (lazy_ptr_name, "L%d$lz", label);
42556 if (MACHOPIC_ATT_STUB)
42557 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42558 else if (MACHOPIC_PURE)
42559 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42560 else
42561 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42563 fprintf (file, "%s:\n", stub);
42564 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42566 if (MACHOPIC_ATT_STUB)
42568 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42570 else if (MACHOPIC_PURE)
42572 /* PIC stub. */
42573 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42574 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42575 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
42576 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42577 label, lazy_ptr_name, label);
42578 fprintf (file, "\tjmp\t*%%ecx\n");
42580 else
42581 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42583 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42584 it needs no stub-binding-helper. */
42585 if (MACHOPIC_ATT_STUB)
42586 return;
42588 fprintf (file, "%s:\n", binder_name);
42590 if (MACHOPIC_PURE)
42592 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42593 fprintf (file, "\tpushl\t%%ecx\n");
42595 else
42596 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42598 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42600 /* N.B. Keep the correspondence of these
42601 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42602 old-pic/new-pic/non-pic stubs; altering this will break
42603 compatibility with existing dylibs. */
42604 if (MACHOPIC_PURE)
42606 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42607 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42609 else
42610 /* 16-byte -mdynamic-no-pic stub. */
42611 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42613 fprintf (file, "%s:\n", lazy_ptr_name);
42614 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42615 fprintf (file, ASM_LONG "%s\n", binder_name);
42617 #endif /* TARGET_MACHO */
42619 /* Order the registers for register allocator. */
42621 void
42622 x86_order_regs_for_local_alloc (void)
42624 int pos = 0;
42625 int i;
42627 /* First allocate the local general purpose registers. */
42628 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42629 if (GENERAL_REGNO_P (i) && call_used_regs[i])
42630 reg_alloc_order [pos++] = i;
42632 /* Global general purpose registers. */
42633 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42634 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42635 reg_alloc_order [pos++] = i;
42637 /* x87 registers come first in case we are doing FP math
42638 using them. */
42639 if (!TARGET_SSE_MATH)
42640 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42641 reg_alloc_order [pos++] = i;
42643 /* SSE registers. */
42644 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42645 reg_alloc_order [pos++] = i;
42646 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42647 reg_alloc_order [pos++] = i;
42649 /* Extended REX SSE registers. */
42650 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42651 reg_alloc_order [pos++] = i;
42653 /* Mask register. */
42654 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42655 reg_alloc_order [pos++] = i;
42657 /* MPX bound registers. */
42658 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42659 reg_alloc_order [pos++] = i;
42661 /* x87 registers. */
42662 if (TARGET_SSE_MATH)
42663 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42664 reg_alloc_order [pos++] = i;
42666 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42667 reg_alloc_order [pos++] = i;
42669 /* Initialize the rest of array as we do not allocate some registers
42670 at all. */
42671 while (pos < FIRST_PSEUDO_REGISTER)
42672 reg_alloc_order [pos++] = 0;
42675 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42676 in struct attribute_spec handler. */
42677 static tree
42678 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42679 tree args,
42680 int,
42681 bool *no_add_attrs)
42683 if (TREE_CODE (*node) != FUNCTION_TYPE
42684 && TREE_CODE (*node) != METHOD_TYPE
42685 && TREE_CODE (*node) != FIELD_DECL
42686 && TREE_CODE (*node) != TYPE_DECL)
42688 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42689 name);
42690 *no_add_attrs = true;
42691 return NULL_TREE;
42693 if (TARGET_64BIT)
42695 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42696 name);
42697 *no_add_attrs = true;
42698 return NULL_TREE;
42700 if (is_attribute_p ("callee_pop_aggregate_return", name))
42702 tree cst;
42704 cst = TREE_VALUE (args);
42705 if (TREE_CODE (cst) != INTEGER_CST)
42707 warning (OPT_Wattributes,
42708 "%qE attribute requires an integer constant argument",
42709 name);
42710 *no_add_attrs = true;
42712 else if (compare_tree_int (cst, 0) != 0
42713 && compare_tree_int (cst, 1) != 0)
42715 warning (OPT_Wattributes,
42716 "argument to %qE attribute is neither zero, nor one",
42717 name);
42718 *no_add_attrs = true;
42721 return NULL_TREE;
42724 return NULL_TREE;
42727 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
42728 struct attribute_spec.handler. */
42729 static tree
42730 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
42731 bool *no_add_attrs)
42733 if (TREE_CODE (*node) != FUNCTION_TYPE
42734 && TREE_CODE (*node) != METHOD_TYPE
42735 && TREE_CODE (*node) != FIELD_DECL
42736 && TREE_CODE (*node) != TYPE_DECL)
42738 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42739 name);
42740 *no_add_attrs = true;
42741 return NULL_TREE;
42744 /* Can combine regparm with all attributes but fastcall. */
42745 if (is_attribute_p ("ms_abi", name))
42747 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
42749 error ("ms_abi and sysv_abi attributes are not compatible");
42752 return NULL_TREE;
42754 else if (is_attribute_p ("sysv_abi", name))
42756 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
42758 error ("ms_abi and sysv_abi attributes are not compatible");
42761 return NULL_TREE;
42764 return NULL_TREE;
42767 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
42768 struct attribute_spec.handler. */
42769 static tree
42770 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
42771 bool *no_add_attrs)
42773 tree *type = NULL;
42774 if (DECL_P (*node))
42776 if (TREE_CODE (*node) == TYPE_DECL)
42777 type = &TREE_TYPE (*node);
42779 else
42780 type = node;
42782 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
42784 warning (OPT_Wattributes, "%qE attribute ignored",
42785 name);
42786 *no_add_attrs = true;
42789 else if ((is_attribute_p ("ms_struct", name)
42790 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
42791 || ((is_attribute_p ("gcc_struct", name)
42792 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
42794 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
42795 name);
42796 *no_add_attrs = true;
42799 return NULL_TREE;
42802 static tree
42803 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
42804 bool *no_add_attrs)
42806 if (TREE_CODE (*node) != FUNCTION_DECL)
42808 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42809 name);
42810 *no_add_attrs = true;
42812 return NULL_TREE;
42815 static bool
42816 ix86_ms_bitfield_layout_p (const_tree record_type)
42818 return ((TARGET_MS_BITFIELD_LAYOUT
42819 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
42820 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
42823 /* Returns an expression indicating where the this parameter is
42824 located on entry to the FUNCTION. */
42826 static rtx
42827 x86_this_parameter (tree function)
42829 tree type = TREE_TYPE (function);
42830 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
42831 int nregs;
42833 if (TARGET_64BIT)
42835 const int *parm_regs;
42837 if (ix86_function_type_abi (type) == MS_ABI)
42838 parm_regs = x86_64_ms_abi_int_parameter_registers;
42839 else
42840 parm_regs = x86_64_int_parameter_registers;
42841 return gen_rtx_REG (Pmode, parm_regs[aggr]);
42844 nregs = ix86_function_regparm (type, function);
42846 if (nregs > 0 && !stdarg_p (type))
42848 int regno;
42849 unsigned int ccvt = ix86_get_callcvt (type);
42851 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42852 regno = aggr ? DX_REG : CX_REG;
42853 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42855 regno = CX_REG;
42856 if (aggr)
42857 return gen_rtx_MEM (SImode,
42858 plus_constant (Pmode, stack_pointer_rtx, 4));
42860 else
42862 regno = AX_REG;
42863 if (aggr)
42865 regno = DX_REG;
42866 if (nregs == 1)
42867 return gen_rtx_MEM (SImode,
42868 plus_constant (Pmode,
42869 stack_pointer_rtx, 4));
42872 return gen_rtx_REG (SImode, regno);
42875 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
42876 aggr ? 8 : 4));
42879 /* Determine whether x86_output_mi_thunk can succeed. */
42881 static bool
42882 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
42883 const_tree function)
42885 /* 64-bit can handle anything. */
42886 if (TARGET_64BIT)
42887 return true;
42889 /* For 32-bit, everything's fine if we have one free register. */
42890 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
42891 return true;
42893 /* Need a free register for vcall_offset. */
42894 if (vcall_offset)
42895 return false;
42897 /* Need a free register for GOT references. */
42898 if (flag_pic && !targetm.binds_local_p (function))
42899 return false;
42901 /* Otherwise ok. */
42902 return true;
42905 /* Output the assembler code for a thunk function. THUNK_DECL is the
42906 declaration for the thunk function itself, FUNCTION is the decl for
42907 the target function. DELTA is an immediate constant offset to be
42908 added to THIS. If VCALL_OFFSET is nonzero, the word at
42909 *(*this + vcall_offset) should be added to THIS. */
42911 static void
42912 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
42913 HOST_WIDE_INT vcall_offset, tree function)
42915 rtx this_param = x86_this_parameter (function);
42916 rtx this_reg, tmp, fnaddr;
42917 unsigned int tmp_regno;
42918 rtx_insn *insn;
42920 if (TARGET_64BIT)
42921 tmp_regno = R10_REG;
42922 else
42924 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
42925 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42926 tmp_regno = AX_REG;
42927 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42928 tmp_regno = DX_REG;
42929 else
42930 tmp_regno = CX_REG;
42933 emit_note (NOTE_INSN_PROLOGUE_END);
42935 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
42936 pull it in now and let DELTA benefit. */
42937 if (REG_P (this_param))
42938 this_reg = this_param;
42939 else if (vcall_offset)
42941 /* Put the this parameter into %eax. */
42942 this_reg = gen_rtx_REG (Pmode, AX_REG);
42943 emit_move_insn (this_reg, this_param);
42945 else
42946 this_reg = NULL_RTX;
42948 /* Adjust the this parameter by a fixed constant. */
42949 if (delta)
42951 rtx delta_rtx = GEN_INT (delta);
42952 rtx delta_dst = this_reg ? this_reg : this_param;
42954 if (TARGET_64BIT)
42956 if (!x86_64_general_operand (delta_rtx, Pmode))
42958 tmp = gen_rtx_REG (Pmode, tmp_regno);
42959 emit_move_insn (tmp, delta_rtx);
42960 delta_rtx = tmp;
42964 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
42967 /* Adjust the this parameter by a value stored in the vtable. */
42968 if (vcall_offset)
42970 rtx vcall_addr, vcall_mem, this_mem;
42972 tmp = gen_rtx_REG (Pmode, tmp_regno);
42974 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
42975 if (Pmode != ptr_mode)
42976 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
42977 emit_move_insn (tmp, this_mem);
42979 /* Adjust the this parameter. */
42980 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
42981 if (TARGET_64BIT
42982 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
42984 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
42985 emit_move_insn (tmp2, GEN_INT (vcall_offset));
42986 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
42989 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
42990 if (Pmode != ptr_mode)
42991 emit_insn (gen_addsi_1_zext (this_reg,
42992 gen_rtx_REG (ptr_mode,
42993 REGNO (this_reg)),
42994 vcall_mem));
42995 else
42996 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
42999 /* If necessary, drop THIS back to its stack slot. */
43000 if (this_reg && this_reg != this_param)
43001 emit_move_insn (this_param, this_reg);
43003 fnaddr = XEXP (DECL_RTL (function), 0);
43004 if (TARGET_64BIT)
43006 if (!flag_pic || targetm.binds_local_p (function)
43007 || TARGET_PECOFF)
43009 else
43011 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
43012 tmp = gen_rtx_CONST (Pmode, tmp);
43013 fnaddr = gen_const_mem (Pmode, tmp);
43016 else
43018 if (!flag_pic || targetm.binds_local_p (function))
43020 #if TARGET_MACHO
43021 else if (TARGET_MACHO)
43023 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
43024 fnaddr = XEXP (fnaddr, 0);
43026 #endif /* TARGET_MACHO */
43027 else
43029 tmp = gen_rtx_REG (Pmode, CX_REG);
43030 output_set_got (tmp, NULL_RTX);
43032 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
43033 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
43034 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
43035 fnaddr = gen_const_mem (Pmode, fnaddr);
43039 /* Our sibling call patterns do not allow memories, because we have no
43040 predicate that can distinguish between frame and non-frame memory.
43041 For our purposes here, we can get away with (ab)using a jump pattern,
43042 because we're going to do no optimization. */
43043 if (MEM_P (fnaddr))
43045 if (sibcall_insn_operand (fnaddr, word_mode))
43047 fnaddr = XEXP (DECL_RTL (function), 0);
43048 tmp = gen_rtx_MEM (QImode, fnaddr);
43049 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43050 tmp = emit_call_insn (tmp);
43051 SIBLING_CALL_P (tmp) = 1;
43053 else
43054 emit_jump_insn (gen_indirect_jump (fnaddr));
43056 else
43058 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
43060 // CM_LARGE_PIC always uses pseudo PIC register which is
43061 // uninitialized. Since FUNCTION is local and calling it
43062 // doesn't go through PLT, we use scratch register %r11 as
43063 // PIC register and initialize it here.
43064 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
43065 ix86_init_large_pic_reg (tmp_regno);
43066 fnaddr = legitimize_pic_address (fnaddr,
43067 gen_rtx_REG (Pmode, tmp_regno));
43070 if (!sibcall_insn_operand (fnaddr, word_mode))
43072 tmp = gen_rtx_REG (word_mode, tmp_regno);
43073 if (GET_MODE (fnaddr) != word_mode)
43074 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
43075 emit_move_insn (tmp, fnaddr);
43076 fnaddr = tmp;
43079 tmp = gen_rtx_MEM (QImode, fnaddr);
43080 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43081 tmp = emit_call_insn (tmp);
43082 SIBLING_CALL_P (tmp) = 1;
43084 emit_barrier ();
43086 /* Emit just enough of rest_of_compilation to get the insns emitted.
43087 Note that use_thunk calls assemble_start_function et al. */
43088 insn = get_insns ();
43089 shorten_branches (insn);
43090 final_start_function (insn, file, 1);
43091 final (insn, file, 1);
43092 final_end_function ();
43095 static void
43096 x86_file_start (void)
43098 default_file_start ();
43099 if (TARGET_16BIT)
43100 fputs ("\t.code16gcc\n", asm_out_file);
43101 #if TARGET_MACHO
43102 darwin_file_start ();
43103 #endif
43104 if (X86_FILE_START_VERSION_DIRECTIVE)
43105 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
43106 if (X86_FILE_START_FLTUSED)
43107 fputs ("\t.global\t__fltused\n", asm_out_file);
43108 if (ix86_asm_dialect == ASM_INTEL)
43109 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
43113 x86_field_alignment (tree field, int computed)
43115 machine_mode mode;
43116 tree type = TREE_TYPE (field);
43118 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
43119 return computed;
43120 mode = TYPE_MODE (strip_array_types (type));
43121 if (mode == DFmode || mode == DCmode
43122 || GET_MODE_CLASS (mode) == MODE_INT
43123 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
43124 return MIN (32, computed);
43125 return computed;
43128 /* Print call to TARGET to FILE. */
43130 static void
43131 x86_print_call_or_nop (FILE *file, const char *target)
43133 if (flag_nop_mcount)
43134 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
43135 else
43136 fprintf (file, "1:\tcall\t%s\n", target);
43139 /* Output assembler code to FILE to increment profiler label # LABELNO
43140 for profiling a function entry. */
43141 void
43142 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
43144 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
43145 : MCOUNT_NAME);
43146 if (TARGET_64BIT)
43148 #ifndef NO_PROFILE_COUNTERS
43149 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
43150 #endif
43152 if (!TARGET_PECOFF && flag_pic)
43153 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
43154 else
43155 x86_print_call_or_nop (file, mcount_name);
43157 else if (flag_pic)
43159 #ifndef NO_PROFILE_COUNTERS
43160 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
43161 LPREFIX, labelno);
43162 #endif
43163 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
43165 else
43167 #ifndef NO_PROFILE_COUNTERS
43168 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
43169 LPREFIX, labelno);
43170 #endif
43171 x86_print_call_or_nop (file, mcount_name);
43174 if (flag_record_mcount)
43176 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
43177 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
43178 fprintf (file, "\t.previous\n");
43182 /* We don't have exact information about the insn sizes, but we may assume
43183 quite safely that we are informed about all 1 byte insns and memory
43184 address sizes. This is enough to eliminate unnecessary padding in
43185 99% of cases. */
43187 static int
43188 min_insn_size (rtx_insn *insn)
43190 int l = 0, len;
43192 if (!INSN_P (insn) || !active_insn_p (insn))
43193 return 0;
43195 /* Discard alignments we've emit and jump instructions. */
43196 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
43197 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
43198 return 0;
43200 /* Important case - calls are always 5 bytes.
43201 It is common to have many calls in the row. */
43202 if (CALL_P (insn)
43203 && symbolic_reference_mentioned_p (PATTERN (insn))
43204 && !SIBLING_CALL_P (insn))
43205 return 5;
43206 len = get_attr_length (insn);
43207 if (len <= 1)
43208 return 1;
43210 /* For normal instructions we rely on get_attr_length being exact,
43211 with a few exceptions. */
43212 if (!JUMP_P (insn))
43214 enum attr_type type = get_attr_type (insn);
43216 switch (type)
43218 case TYPE_MULTI:
43219 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
43220 || asm_noperands (PATTERN (insn)) >= 0)
43221 return 0;
43222 break;
43223 case TYPE_OTHER:
43224 case TYPE_FCMP:
43225 break;
43226 default:
43227 /* Otherwise trust get_attr_length. */
43228 return len;
43231 l = get_attr_length_address (insn);
43232 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
43233 l = 4;
43235 if (l)
43236 return 1+l;
43237 else
43238 return 2;
43241 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43243 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
43244 window. */
43246 static void
43247 ix86_avoid_jump_mispredicts (void)
43249 rtx_insn *insn, *start = get_insns ();
43250 int nbytes = 0, njumps = 0;
43251 bool isjump = false;
43253 /* Look for all minimal intervals of instructions containing 4 jumps.
43254 The intervals are bounded by START and INSN. NBYTES is the total
43255 size of instructions in the interval including INSN and not including
43256 START. When the NBYTES is smaller than 16 bytes, it is possible
43257 that the end of START and INSN ends up in the same 16byte page.
43259 The smallest offset in the page INSN can start is the case where START
43260 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
43261 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
43263 Don't consider asm goto as jump, while it can contain a jump, it doesn't
43264 have to, control transfer to label(s) can be performed through other
43265 means, and also we estimate minimum length of all asm stmts as 0. */
43266 for (insn = start; insn; insn = NEXT_INSN (insn))
43268 int min_size;
43270 if (LABEL_P (insn))
43272 int align = label_to_alignment (insn);
43273 int max_skip = label_to_max_skip (insn);
43275 if (max_skip > 15)
43276 max_skip = 15;
43277 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
43278 already in the current 16 byte page, because otherwise
43279 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
43280 bytes to reach 16 byte boundary. */
43281 if (align <= 0
43282 || (align <= 3 && max_skip != (1 << align) - 1))
43283 max_skip = 0;
43284 if (dump_file)
43285 fprintf (dump_file, "Label %i with max_skip %i\n",
43286 INSN_UID (insn), max_skip);
43287 if (max_skip)
43289 while (nbytes + max_skip >= 16)
43291 start = NEXT_INSN (start);
43292 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43293 || CALL_P (start))
43294 njumps--, isjump = true;
43295 else
43296 isjump = false;
43297 nbytes -= min_insn_size (start);
43300 continue;
43303 min_size = min_insn_size (insn);
43304 nbytes += min_size;
43305 if (dump_file)
43306 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
43307 INSN_UID (insn), min_size);
43308 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
43309 || CALL_P (insn))
43310 njumps++;
43311 else
43312 continue;
43314 while (njumps > 3)
43316 start = NEXT_INSN (start);
43317 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43318 || CALL_P (start))
43319 njumps--, isjump = true;
43320 else
43321 isjump = false;
43322 nbytes -= min_insn_size (start);
43324 gcc_assert (njumps >= 0);
43325 if (dump_file)
43326 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
43327 INSN_UID (start), INSN_UID (insn), nbytes);
43329 if (njumps == 3 && isjump && nbytes < 16)
43331 int padsize = 15 - nbytes + min_insn_size (insn);
43333 if (dump_file)
43334 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
43335 INSN_UID (insn), padsize);
43336 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
43340 #endif
43342 /* AMD Athlon works faster
43343 when RET is not destination of conditional jump or directly preceded
43344 by other jump instruction. We avoid the penalty by inserting NOP just
43345 before the RET instructions in such cases. */
43346 static void
43347 ix86_pad_returns (void)
43349 edge e;
43350 edge_iterator ei;
43352 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43354 basic_block bb = e->src;
43355 rtx_insn *ret = BB_END (bb);
43356 rtx_insn *prev;
43357 bool replace = false;
43359 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
43360 || optimize_bb_for_size_p (bb))
43361 continue;
43362 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
43363 if (active_insn_p (prev) || LABEL_P (prev))
43364 break;
43365 if (prev && LABEL_P (prev))
43367 edge e;
43368 edge_iterator ei;
43370 FOR_EACH_EDGE (e, ei, bb->preds)
43371 if (EDGE_FREQUENCY (e) && e->src->index >= 0
43372 && !(e->flags & EDGE_FALLTHRU))
43374 replace = true;
43375 break;
43378 if (!replace)
43380 prev = prev_active_insn (ret);
43381 if (prev
43382 && ((JUMP_P (prev) && any_condjump_p (prev))
43383 || CALL_P (prev)))
43384 replace = true;
43385 /* Empty functions get branch mispredict even when
43386 the jump destination is not visible to us. */
43387 if (!prev && !optimize_function_for_size_p (cfun))
43388 replace = true;
43390 if (replace)
43392 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43393 delete_insn (ret);
43398 /* Count the minimum number of instructions in BB. Return 4 if the
43399 number of instructions >= 4. */
43401 static int
43402 ix86_count_insn_bb (basic_block bb)
43404 rtx_insn *insn;
43405 int insn_count = 0;
43407 /* Count number of instructions in this block. Return 4 if the number
43408 of instructions >= 4. */
43409 FOR_BB_INSNS (bb, insn)
43411 /* Only happen in exit blocks. */
43412 if (JUMP_P (insn)
43413 && ANY_RETURN_P (PATTERN (insn)))
43414 break;
43416 if (NONDEBUG_INSN_P (insn)
43417 && GET_CODE (PATTERN (insn)) != USE
43418 && GET_CODE (PATTERN (insn)) != CLOBBER)
43420 insn_count++;
43421 if (insn_count >= 4)
43422 return insn_count;
43426 return insn_count;
43430 /* Count the minimum number of instructions in code path in BB.
43431 Return 4 if the number of instructions >= 4. */
43433 static int
43434 ix86_count_insn (basic_block bb)
43436 edge e;
43437 edge_iterator ei;
43438 int min_prev_count;
43440 /* Only bother counting instructions along paths with no
43441 more than 2 basic blocks between entry and exit. Given
43442 that BB has an edge to exit, determine if a predecessor
43443 of BB has an edge from entry. If so, compute the number
43444 of instructions in the predecessor block. If there
43445 happen to be multiple such blocks, compute the minimum. */
43446 min_prev_count = 4;
43447 FOR_EACH_EDGE (e, ei, bb->preds)
43449 edge prev_e;
43450 edge_iterator prev_ei;
43452 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43454 min_prev_count = 0;
43455 break;
43457 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43459 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43461 int count = ix86_count_insn_bb (e->src);
43462 if (count < min_prev_count)
43463 min_prev_count = count;
43464 break;
43469 if (min_prev_count < 4)
43470 min_prev_count += ix86_count_insn_bb (bb);
43472 return min_prev_count;
43475 /* Pad short function to 4 instructions. */
43477 static void
43478 ix86_pad_short_function (void)
43480 edge e;
43481 edge_iterator ei;
43483 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43485 rtx_insn *ret = BB_END (e->src);
43486 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43488 int insn_count = ix86_count_insn (e->src);
43490 /* Pad short function. */
43491 if (insn_count < 4)
43493 rtx_insn *insn = ret;
43495 /* Find epilogue. */
43496 while (insn
43497 && (!NOTE_P (insn)
43498 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43499 insn = PREV_INSN (insn);
43501 if (!insn)
43502 insn = ret;
43504 /* Two NOPs count as one instruction. */
43505 insn_count = 2 * (4 - insn_count);
43506 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43512 /* Fix up a Windows system unwinder issue. If an EH region falls through into
43513 the epilogue, the Windows system unwinder will apply epilogue logic and
43514 produce incorrect offsets. This can be avoided by adding a nop between
43515 the last insn that can throw and the first insn of the epilogue. */
43517 static void
43518 ix86_seh_fixup_eh_fallthru (void)
43520 edge e;
43521 edge_iterator ei;
43523 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43525 rtx_insn *insn, *next;
43527 /* Find the beginning of the epilogue. */
43528 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43529 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43530 break;
43531 if (insn == NULL)
43532 continue;
43534 /* We only care about preceding insns that can throw. */
43535 insn = prev_active_insn (insn);
43536 if (insn == NULL || !can_throw_internal (insn))
43537 continue;
43539 /* Do not separate calls from their debug information. */
43540 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43541 if (NOTE_P (next)
43542 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43543 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43544 insn = next;
43545 else
43546 break;
43548 emit_insn_after (gen_nops (const1_rtx), insn);
43552 /* Implement machine specific optimizations. We implement padding of returns
43553 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
43554 static void
43555 ix86_reorg (void)
43557 /* We are freeing block_for_insn in the toplev to keep compatibility
43558 with old MDEP_REORGS that are not CFG based. Recompute it now. */
43559 compute_bb_for_insn ();
43561 if (TARGET_SEH && current_function_has_exception_handlers ())
43562 ix86_seh_fixup_eh_fallthru ();
43564 if (optimize && optimize_function_for_speed_p (cfun))
43566 if (TARGET_PAD_SHORT_FUNCTION)
43567 ix86_pad_short_function ();
43568 else if (TARGET_PAD_RETURNS)
43569 ix86_pad_returns ();
43570 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43571 if (TARGET_FOUR_JUMP_LIMIT)
43572 ix86_avoid_jump_mispredicts ();
43573 #endif
43577 /* Return nonzero when QImode register that must be represented via REX prefix
43578 is used. */
43579 bool
43580 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43582 int i;
43583 extract_insn_cached (insn);
43584 for (i = 0; i < recog_data.n_operands; i++)
43585 if (GENERAL_REG_P (recog_data.operand[i])
43586 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
43587 return true;
43588 return false;
43591 /* Return true when INSN mentions register that must be encoded using REX
43592 prefix. */
43593 bool
43594 x86_extended_reg_mentioned_p (rtx insn)
43596 subrtx_iterator::array_type array;
43597 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43599 const_rtx x = *iter;
43600 if (REG_P (x)
43601 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43602 return true;
43604 return false;
43607 /* If profitable, negate (without causing overflow) integer constant
43608 of mode MODE at location LOC. Return true in this case. */
43609 bool
43610 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43612 HOST_WIDE_INT val;
43614 if (!CONST_INT_P (*loc))
43615 return false;
43617 switch (mode)
43619 case DImode:
43620 /* DImode x86_64 constants must fit in 32 bits. */
43621 gcc_assert (x86_64_immediate_operand (*loc, mode));
43623 mode = SImode;
43624 break;
43626 case SImode:
43627 case HImode:
43628 case QImode:
43629 break;
43631 default:
43632 gcc_unreachable ();
43635 /* Avoid overflows. */
43636 if (mode_signbit_p (mode, *loc))
43637 return false;
43639 val = INTVAL (*loc);
43641 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43642 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
43643 if ((val < 0 && val != -128)
43644 || val == 128)
43646 *loc = GEN_INT (-val);
43647 return true;
43650 return false;
43653 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
43654 optabs would emit if we didn't have TFmode patterns. */
43656 void
43657 x86_emit_floatuns (rtx operands[2])
43659 rtx_code_label *neglab, *donelab;
43660 rtx i0, i1, f0, in, out;
43661 machine_mode mode, inmode;
43663 inmode = GET_MODE (operands[1]);
43664 gcc_assert (inmode == SImode || inmode == DImode);
43666 out = operands[0];
43667 in = force_reg (inmode, operands[1]);
43668 mode = GET_MODE (out);
43669 neglab = gen_label_rtx ();
43670 donelab = gen_label_rtx ();
43671 f0 = gen_reg_rtx (mode);
43673 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43675 expand_float (out, in, 0);
43677 emit_jump_insn (gen_jump (donelab));
43678 emit_barrier ();
43680 emit_label (neglab);
43682 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43683 1, OPTAB_DIRECT);
43684 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43685 1, OPTAB_DIRECT);
43686 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43688 expand_float (f0, i0, 0);
43690 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
43692 emit_label (donelab);
43695 static bool canonicalize_perm (struct expand_vec_perm_d *d);
43696 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43697 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43698 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43700 /* Get a vector mode of the same size as the original but with elements
43701 twice as wide. This is only guaranteed to apply to integral vectors. */
43703 static inline machine_mode
43704 get_mode_wider_vector (machine_mode o)
43706 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
43707 machine_mode n = GET_MODE_WIDER_MODE (o);
43708 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43709 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43710 return n;
43713 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
43714 fill target with val via vec_duplicate. */
43716 static bool
43717 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
43719 bool ok;
43720 rtx_insn *insn;
43721 rtx dup;
43723 /* First attempt to recognize VAL as-is. */
43724 dup = gen_rtx_VEC_DUPLICATE (mode, val);
43725 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
43726 if (recog_memoized (insn) < 0)
43728 rtx_insn *seq;
43729 /* If that fails, force VAL into a register. */
43731 start_sequence ();
43732 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
43733 seq = get_insns ();
43734 end_sequence ();
43735 if (seq)
43736 emit_insn_before (seq, insn);
43738 ok = recog_memoized (insn) >= 0;
43739 gcc_assert (ok);
43741 return true;
43744 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43745 with all elements equal to VAR. Return true if successful. */
43747 static bool
43748 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
43749 rtx target, rtx val)
43751 bool ok;
43753 switch (mode)
43755 case V2SImode:
43756 case V2SFmode:
43757 if (!mmx_ok)
43758 return false;
43759 /* FALLTHRU */
43761 case V4DFmode:
43762 case V4DImode:
43763 case V8SFmode:
43764 case V8SImode:
43765 case V2DFmode:
43766 case V2DImode:
43767 case V4SFmode:
43768 case V4SImode:
43769 case V16SImode:
43770 case V8DImode:
43771 case V16SFmode:
43772 case V8DFmode:
43773 return ix86_vector_duplicate_value (mode, target, val);
43775 case V4HImode:
43776 if (!mmx_ok)
43777 return false;
43778 if (TARGET_SSE || TARGET_3DNOW_A)
43780 rtx x;
43782 val = gen_lowpart (SImode, val);
43783 x = gen_rtx_TRUNCATE (HImode, val);
43784 x = gen_rtx_VEC_DUPLICATE (mode, x);
43785 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43786 return true;
43788 goto widen;
43790 case V8QImode:
43791 if (!mmx_ok)
43792 return false;
43793 goto widen;
43795 case V8HImode:
43796 if (TARGET_AVX2)
43797 return ix86_vector_duplicate_value (mode, target, val);
43799 if (TARGET_SSE2)
43801 struct expand_vec_perm_d dperm;
43802 rtx tmp1, tmp2;
43804 permute:
43805 memset (&dperm, 0, sizeof (dperm));
43806 dperm.target = target;
43807 dperm.vmode = mode;
43808 dperm.nelt = GET_MODE_NUNITS (mode);
43809 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
43810 dperm.one_operand_p = true;
43812 /* Extend to SImode using a paradoxical SUBREG. */
43813 tmp1 = gen_reg_rtx (SImode);
43814 emit_move_insn (tmp1, gen_lowpart (SImode, val));
43816 /* Insert the SImode value as low element of a V4SImode vector. */
43817 tmp2 = gen_reg_rtx (V4SImode);
43818 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
43819 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
43821 ok = (expand_vec_perm_1 (&dperm)
43822 || expand_vec_perm_broadcast_1 (&dperm));
43823 gcc_assert (ok);
43824 return ok;
43826 goto widen;
43828 case V16QImode:
43829 if (TARGET_AVX2)
43830 return ix86_vector_duplicate_value (mode, target, val);
43832 if (TARGET_SSE2)
43833 goto permute;
43834 goto widen;
43836 widen:
43837 /* Replicate the value once into the next wider mode and recurse. */
43839 machine_mode smode, wsmode, wvmode;
43840 rtx x;
43842 smode = GET_MODE_INNER (mode);
43843 wvmode = get_mode_wider_vector (mode);
43844 wsmode = GET_MODE_INNER (wvmode);
43846 val = convert_modes (wsmode, smode, val, true);
43847 x = expand_simple_binop (wsmode, ASHIFT, val,
43848 GEN_INT (GET_MODE_BITSIZE (smode)),
43849 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43850 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
43852 x = gen_reg_rtx (wvmode);
43853 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
43854 gcc_assert (ok);
43855 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
43856 return ok;
43859 case V16HImode:
43860 case V32QImode:
43861 if (TARGET_AVX2)
43862 return ix86_vector_duplicate_value (mode, target, val);
43863 else
43865 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
43866 rtx x = gen_reg_rtx (hvmode);
43868 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43869 gcc_assert (ok);
43871 x = gen_rtx_VEC_CONCAT (mode, x, x);
43872 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43874 return true;
43876 case V64QImode:
43877 case V32HImode:
43878 if (TARGET_AVX512BW)
43879 return ix86_vector_duplicate_value (mode, target, val);
43880 else
43882 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
43883 rtx x = gen_reg_rtx (hvmode);
43885 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43886 gcc_assert (ok);
43888 x = gen_rtx_VEC_CONCAT (mode, x, x);
43889 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43891 return true;
43893 default:
43894 return false;
43898 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43899 whose ONE_VAR element is VAR, and other elements are zero. Return true
43900 if successful. */
43902 static bool
43903 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
43904 rtx target, rtx var, int one_var)
43906 machine_mode vsimode;
43907 rtx new_target;
43908 rtx x, tmp;
43909 bool use_vector_set = false;
43911 switch (mode)
43913 case V2DImode:
43914 /* For SSE4.1, we normally use vector set. But if the second
43915 element is zero and inter-unit moves are OK, we use movq
43916 instead. */
43917 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
43918 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
43919 && one_var == 0));
43920 break;
43921 case V16QImode:
43922 case V4SImode:
43923 case V4SFmode:
43924 use_vector_set = TARGET_SSE4_1;
43925 break;
43926 case V8HImode:
43927 use_vector_set = TARGET_SSE2;
43928 break;
43929 case V4HImode:
43930 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
43931 break;
43932 case V32QImode:
43933 case V16HImode:
43934 case V8SImode:
43935 case V8SFmode:
43936 case V4DFmode:
43937 use_vector_set = TARGET_AVX;
43938 break;
43939 case V4DImode:
43940 /* Use ix86_expand_vector_set in 64bit mode only. */
43941 use_vector_set = TARGET_AVX && TARGET_64BIT;
43942 break;
43943 default:
43944 break;
43947 if (use_vector_set)
43949 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
43950 var = force_reg (GET_MODE_INNER (mode), var);
43951 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43952 return true;
43955 switch (mode)
43957 case V2SFmode:
43958 case V2SImode:
43959 if (!mmx_ok)
43960 return false;
43961 /* FALLTHRU */
43963 case V2DFmode:
43964 case V2DImode:
43965 if (one_var != 0)
43966 return false;
43967 var = force_reg (GET_MODE_INNER (mode), var);
43968 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
43969 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43970 return true;
43972 case V4SFmode:
43973 case V4SImode:
43974 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
43975 new_target = gen_reg_rtx (mode);
43976 else
43977 new_target = target;
43978 var = force_reg (GET_MODE_INNER (mode), var);
43979 x = gen_rtx_VEC_DUPLICATE (mode, var);
43980 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
43981 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
43982 if (one_var != 0)
43984 /* We need to shuffle the value to the correct position, so
43985 create a new pseudo to store the intermediate result. */
43987 /* With SSE2, we can use the integer shuffle insns. */
43988 if (mode != V4SFmode && TARGET_SSE2)
43990 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
43991 const1_rtx,
43992 GEN_INT (one_var == 1 ? 0 : 1),
43993 GEN_INT (one_var == 2 ? 0 : 1),
43994 GEN_INT (one_var == 3 ? 0 : 1)));
43995 if (target != new_target)
43996 emit_move_insn (target, new_target);
43997 return true;
44000 /* Otherwise convert the intermediate result to V4SFmode and
44001 use the SSE1 shuffle instructions. */
44002 if (mode != V4SFmode)
44004 tmp = gen_reg_rtx (V4SFmode);
44005 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
44007 else
44008 tmp = new_target;
44010 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
44011 const1_rtx,
44012 GEN_INT (one_var == 1 ? 0 : 1),
44013 GEN_INT (one_var == 2 ? 0+4 : 1+4),
44014 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
44016 if (mode != V4SFmode)
44017 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
44018 else if (tmp != target)
44019 emit_move_insn (target, tmp);
44021 else if (target != new_target)
44022 emit_move_insn (target, new_target);
44023 return true;
44025 case V8HImode:
44026 case V16QImode:
44027 vsimode = V4SImode;
44028 goto widen;
44029 case V4HImode:
44030 case V8QImode:
44031 if (!mmx_ok)
44032 return false;
44033 vsimode = V2SImode;
44034 goto widen;
44035 widen:
44036 if (one_var != 0)
44037 return false;
44039 /* Zero extend the variable element to SImode and recurse. */
44040 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
44042 x = gen_reg_rtx (vsimode);
44043 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
44044 var, one_var))
44045 gcc_unreachable ();
44047 emit_move_insn (target, gen_lowpart (mode, x));
44048 return true;
44050 default:
44051 return false;
44055 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
44056 consisting of the values in VALS. It is known that all elements
44057 except ONE_VAR are constants. Return true if successful. */
44059 static bool
44060 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
44061 rtx target, rtx vals, int one_var)
44063 rtx var = XVECEXP (vals, 0, one_var);
44064 machine_mode wmode;
44065 rtx const_vec, x;
44067 const_vec = copy_rtx (vals);
44068 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
44069 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
44071 switch (mode)
44073 case V2DFmode:
44074 case V2DImode:
44075 case V2SFmode:
44076 case V2SImode:
44077 /* For the two element vectors, it's just as easy to use
44078 the general case. */
44079 return false;
44081 case V4DImode:
44082 /* Use ix86_expand_vector_set in 64bit mode only. */
44083 if (!TARGET_64BIT)
44084 return false;
44085 case V4DFmode:
44086 case V8SFmode:
44087 case V8SImode:
44088 case V16HImode:
44089 case V32QImode:
44090 case V4SFmode:
44091 case V4SImode:
44092 case V8HImode:
44093 case V4HImode:
44094 break;
44096 case V16QImode:
44097 if (TARGET_SSE4_1)
44098 break;
44099 wmode = V8HImode;
44100 goto widen;
44101 case V8QImode:
44102 wmode = V4HImode;
44103 goto widen;
44104 widen:
44105 /* There's no way to set one QImode entry easily. Combine
44106 the variable value with its adjacent constant value, and
44107 promote to an HImode set. */
44108 x = XVECEXP (vals, 0, one_var ^ 1);
44109 if (one_var & 1)
44111 var = convert_modes (HImode, QImode, var, true);
44112 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
44113 NULL_RTX, 1, OPTAB_LIB_WIDEN);
44114 x = GEN_INT (INTVAL (x) & 0xff);
44116 else
44118 var = convert_modes (HImode, QImode, var, true);
44119 x = gen_int_mode (INTVAL (x) << 8, HImode);
44121 if (x != const0_rtx)
44122 var = expand_simple_binop (HImode, IOR, var, x, var,
44123 1, OPTAB_LIB_WIDEN);
44125 x = gen_reg_rtx (wmode);
44126 emit_move_insn (x, gen_lowpart (wmode, const_vec));
44127 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
44129 emit_move_insn (target, gen_lowpart (mode, x));
44130 return true;
44132 default:
44133 return false;
44136 emit_move_insn (target, const_vec);
44137 ix86_expand_vector_set (mmx_ok, target, var, one_var);
44138 return true;
44141 /* A subroutine of ix86_expand_vector_init_general. Use vector
44142 concatenate to handle the most general case: all values variable,
44143 and none identical. */
44145 static void
44146 ix86_expand_vector_init_concat (machine_mode mode,
44147 rtx target, rtx *ops, int n)
44149 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
44150 rtx first[16], second[8], third[4];
44151 rtvec v;
44152 int i, j;
44154 switch (n)
44156 case 2:
44157 switch (mode)
44159 case V16SImode:
44160 cmode = V8SImode;
44161 break;
44162 case V16SFmode:
44163 cmode = V8SFmode;
44164 break;
44165 case V8DImode:
44166 cmode = V4DImode;
44167 break;
44168 case V8DFmode:
44169 cmode = V4DFmode;
44170 break;
44171 case V8SImode:
44172 cmode = V4SImode;
44173 break;
44174 case V8SFmode:
44175 cmode = V4SFmode;
44176 break;
44177 case V4DImode:
44178 cmode = V2DImode;
44179 break;
44180 case V4DFmode:
44181 cmode = V2DFmode;
44182 break;
44183 case V4SImode:
44184 cmode = V2SImode;
44185 break;
44186 case V4SFmode:
44187 cmode = V2SFmode;
44188 break;
44189 case V2DImode:
44190 cmode = DImode;
44191 break;
44192 case V2SImode:
44193 cmode = SImode;
44194 break;
44195 case V2DFmode:
44196 cmode = DFmode;
44197 break;
44198 case V2SFmode:
44199 cmode = SFmode;
44200 break;
44201 default:
44202 gcc_unreachable ();
44205 if (!register_operand (ops[1], cmode))
44206 ops[1] = force_reg (cmode, ops[1]);
44207 if (!register_operand (ops[0], cmode))
44208 ops[0] = force_reg (cmode, ops[0]);
44209 emit_insn (gen_rtx_SET (VOIDmode, target,
44210 gen_rtx_VEC_CONCAT (mode, ops[0],
44211 ops[1])));
44212 break;
44214 case 4:
44215 switch (mode)
44217 case V4DImode:
44218 cmode = V2DImode;
44219 break;
44220 case V4DFmode:
44221 cmode = V2DFmode;
44222 break;
44223 case V4SImode:
44224 cmode = V2SImode;
44225 break;
44226 case V4SFmode:
44227 cmode = V2SFmode;
44228 break;
44229 default:
44230 gcc_unreachable ();
44232 goto half;
44234 case 8:
44235 switch (mode)
44237 case V8DImode:
44238 cmode = V2DImode;
44239 hmode = V4DImode;
44240 break;
44241 case V8DFmode:
44242 cmode = V2DFmode;
44243 hmode = V4DFmode;
44244 break;
44245 case V8SImode:
44246 cmode = V2SImode;
44247 hmode = V4SImode;
44248 break;
44249 case V8SFmode:
44250 cmode = V2SFmode;
44251 hmode = V4SFmode;
44252 break;
44253 default:
44254 gcc_unreachable ();
44256 goto half;
44258 case 16:
44259 switch (mode)
44261 case V16SImode:
44262 cmode = V2SImode;
44263 hmode = V4SImode;
44264 gmode = V8SImode;
44265 break;
44266 case V16SFmode:
44267 cmode = V2SFmode;
44268 hmode = V4SFmode;
44269 gmode = V8SFmode;
44270 break;
44271 default:
44272 gcc_unreachable ();
44274 goto half;
44276 half:
44277 /* FIXME: We process inputs backward to help RA. PR 36222. */
44278 i = n - 1;
44279 j = (n >> 1) - 1;
44280 for (; i > 0; i -= 2, j--)
44282 first[j] = gen_reg_rtx (cmode);
44283 v = gen_rtvec (2, ops[i - 1], ops[i]);
44284 ix86_expand_vector_init (false, first[j],
44285 gen_rtx_PARALLEL (cmode, v));
44288 n >>= 1;
44289 if (n > 4)
44291 gcc_assert (hmode != VOIDmode);
44292 gcc_assert (gmode != VOIDmode);
44293 for (i = j = 0; i < n; i += 2, j++)
44295 second[j] = gen_reg_rtx (hmode);
44296 ix86_expand_vector_init_concat (hmode, second [j],
44297 &first [i], 2);
44299 n >>= 1;
44300 for (i = j = 0; i < n; i += 2, j++)
44302 third[j] = gen_reg_rtx (gmode);
44303 ix86_expand_vector_init_concat (gmode, third[j],
44304 &second[i], 2);
44306 n >>= 1;
44307 ix86_expand_vector_init_concat (mode, target, third, n);
44309 else if (n > 2)
44311 gcc_assert (hmode != VOIDmode);
44312 for (i = j = 0; i < n; i += 2, j++)
44314 second[j] = gen_reg_rtx (hmode);
44315 ix86_expand_vector_init_concat (hmode, second [j],
44316 &first [i], 2);
44318 n >>= 1;
44319 ix86_expand_vector_init_concat (mode, target, second, n);
44321 else
44322 ix86_expand_vector_init_concat (mode, target, first, n);
44323 break;
44325 default:
44326 gcc_unreachable ();
44330 /* A subroutine of ix86_expand_vector_init_general. Use vector
44331 interleave to handle the most general case: all values variable,
44332 and none identical. */
44334 static void
44335 ix86_expand_vector_init_interleave (machine_mode mode,
44336 rtx target, rtx *ops, int n)
44338 machine_mode first_imode, second_imode, third_imode, inner_mode;
44339 int i, j;
44340 rtx op0, op1;
44341 rtx (*gen_load_even) (rtx, rtx, rtx);
44342 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
44343 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
44345 switch (mode)
44347 case V8HImode:
44348 gen_load_even = gen_vec_setv8hi;
44349 gen_interleave_first_low = gen_vec_interleave_lowv4si;
44350 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44351 inner_mode = HImode;
44352 first_imode = V4SImode;
44353 second_imode = V2DImode;
44354 third_imode = VOIDmode;
44355 break;
44356 case V16QImode:
44357 gen_load_even = gen_vec_setv16qi;
44358 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
44359 gen_interleave_second_low = gen_vec_interleave_lowv4si;
44360 inner_mode = QImode;
44361 first_imode = V8HImode;
44362 second_imode = V4SImode;
44363 third_imode = V2DImode;
44364 break;
44365 default:
44366 gcc_unreachable ();
44369 for (i = 0; i < n; i++)
44371 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
44372 op0 = gen_reg_rtx (SImode);
44373 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
44375 /* Insert the SImode value as low element of V4SImode vector. */
44376 op1 = gen_reg_rtx (V4SImode);
44377 op0 = gen_rtx_VEC_MERGE (V4SImode,
44378 gen_rtx_VEC_DUPLICATE (V4SImode,
44379 op0),
44380 CONST0_RTX (V4SImode),
44381 const1_rtx);
44382 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
44384 /* Cast the V4SImode vector back to a vector in orignal mode. */
44385 op0 = gen_reg_rtx (mode);
44386 emit_move_insn (op0, gen_lowpart (mode, op1));
44388 /* Load even elements into the second position. */
44389 emit_insn (gen_load_even (op0,
44390 force_reg (inner_mode,
44391 ops [i + i + 1]),
44392 const1_rtx));
44394 /* Cast vector to FIRST_IMODE vector. */
44395 ops[i] = gen_reg_rtx (first_imode);
44396 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44399 /* Interleave low FIRST_IMODE vectors. */
44400 for (i = j = 0; i < n; i += 2, j++)
44402 op0 = gen_reg_rtx (first_imode);
44403 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44405 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
44406 ops[j] = gen_reg_rtx (second_imode);
44407 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44410 /* Interleave low SECOND_IMODE vectors. */
44411 switch (second_imode)
44413 case V4SImode:
44414 for (i = j = 0; i < n / 2; i += 2, j++)
44416 op0 = gen_reg_rtx (second_imode);
44417 emit_insn (gen_interleave_second_low (op0, ops[i],
44418 ops[i + 1]));
44420 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44421 vector. */
44422 ops[j] = gen_reg_rtx (third_imode);
44423 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44425 second_imode = V2DImode;
44426 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44427 /* FALLTHRU */
44429 case V2DImode:
44430 op0 = gen_reg_rtx (second_imode);
44431 emit_insn (gen_interleave_second_low (op0, ops[0],
44432 ops[1]));
44434 /* Cast the SECOND_IMODE vector back to a vector on original
44435 mode. */
44436 emit_insn (gen_rtx_SET (VOIDmode, target,
44437 gen_lowpart (mode, op0)));
44438 break;
44440 default:
44441 gcc_unreachable ();
44445 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
44446 all values variable, and none identical. */
44448 static void
44449 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44450 rtx target, rtx vals)
44452 rtx ops[64], op0, op1, op2, op3, op4, op5;
44453 machine_mode half_mode = VOIDmode;
44454 machine_mode quarter_mode = VOIDmode;
44455 int n, i;
44457 switch (mode)
44459 case V2SFmode:
44460 case V2SImode:
44461 if (!mmx_ok && !TARGET_SSE)
44462 break;
44463 /* FALLTHRU */
44465 case V16SImode:
44466 case V16SFmode:
44467 case V8DFmode:
44468 case V8DImode:
44469 case V8SFmode:
44470 case V8SImode:
44471 case V4DFmode:
44472 case V4DImode:
44473 case V4SFmode:
44474 case V4SImode:
44475 case V2DFmode:
44476 case V2DImode:
44477 n = GET_MODE_NUNITS (mode);
44478 for (i = 0; i < n; i++)
44479 ops[i] = XVECEXP (vals, 0, i);
44480 ix86_expand_vector_init_concat (mode, target, ops, n);
44481 return;
44483 case V32QImode:
44484 half_mode = V16QImode;
44485 goto half;
44487 case V16HImode:
44488 half_mode = V8HImode;
44489 goto half;
44491 half:
44492 n = GET_MODE_NUNITS (mode);
44493 for (i = 0; i < n; i++)
44494 ops[i] = XVECEXP (vals, 0, i);
44495 op0 = gen_reg_rtx (half_mode);
44496 op1 = gen_reg_rtx (half_mode);
44497 ix86_expand_vector_init_interleave (half_mode, op0, ops,
44498 n >> 2);
44499 ix86_expand_vector_init_interleave (half_mode, op1,
44500 &ops [n >> 1], n >> 2);
44501 emit_insn (gen_rtx_SET (VOIDmode, target,
44502 gen_rtx_VEC_CONCAT (mode, op0, op1)));
44503 return;
44505 case V64QImode:
44506 quarter_mode = V16QImode;
44507 half_mode = V32QImode;
44508 goto quarter;
44510 case V32HImode:
44511 quarter_mode = V8HImode;
44512 half_mode = V16HImode;
44513 goto quarter;
44515 quarter:
44516 n = GET_MODE_NUNITS (mode);
44517 for (i = 0; i < n; i++)
44518 ops[i] = XVECEXP (vals, 0, i);
44519 op0 = gen_reg_rtx (quarter_mode);
44520 op1 = gen_reg_rtx (quarter_mode);
44521 op2 = gen_reg_rtx (quarter_mode);
44522 op3 = gen_reg_rtx (quarter_mode);
44523 op4 = gen_reg_rtx (half_mode);
44524 op5 = gen_reg_rtx (half_mode);
44525 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44526 n >> 3);
44527 ix86_expand_vector_init_interleave (quarter_mode, op1,
44528 &ops [n >> 2], n >> 3);
44529 ix86_expand_vector_init_interleave (quarter_mode, op2,
44530 &ops [n >> 1], n >> 3);
44531 ix86_expand_vector_init_interleave (quarter_mode, op3,
44532 &ops [(n >> 1) | (n >> 2)], n >> 3);
44533 emit_insn (gen_rtx_SET (VOIDmode, op4,
44534 gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44535 emit_insn (gen_rtx_SET (VOIDmode, op5,
44536 gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44537 emit_insn (gen_rtx_SET (VOIDmode, target,
44538 gen_rtx_VEC_CONCAT (mode, op4, op5)));
44539 return;
44541 case V16QImode:
44542 if (!TARGET_SSE4_1)
44543 break;
44544 /* FALLTHRU */
44546 case V8HImode:
44547 if (!TARGET_SSE2)
44548 break;
44550 /* Don't use ix86_expand_vector_init_interleave if we can't
44551 move from GPR to SSE register directly. */
44552 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44553 break;
44555 n = GET_MODE_NUNITS (mode);
44556 for (i = 0; i < n; i++)
44557 ops[i] = XVECEXP (vals, 0, i);
44558 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44559 return;
44561 case V4HImode:
44562 case V8QImode:
44563 break;
44565 default:
44566 gcc_unreachable ();
44570 int i, j, n_elts, n_words, n_elt_per_word;
44571 machine_mode inner_mode;
44572 rtx words[4], shift;
44574 inner_mode = GET_MODE_INNER (mode);
44575 n_elts = GET_MODE_NUNITS (mode);
44576 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44577 n_elt_per_word = n_elts / n_words;
44578 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44580 for (i = 0; i < n_words; ++i)
44582 rtx word = NULL_RTX;
44584 for (j = 0; j < n_elt_per_word; ++j)
44586 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44587 elt = convert_modes (word_mode, inner_mode, elt, true);
44589 if (j == 0)
44590 word = elt;
44591 else
44593 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44594 word, 1, OPTAB_LIB_WIDEN);
44595 word = expand_simple_binop (word_mode, IOR, word, elt,
44596 word, 1, OPTAB_LIB_WIDEN);
44600 words[i] = word;
44603 if (n_words == 1)
44604 emit_move_insn (target, gen_lowpart (mode, words[0]));
44605 else if (n_words == 2)
44607 rtx tmp = gen_reg_rtx (mode);
44608 emit_clobber (tmp);
44609 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44610 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44611 emit_move_insn (target, tmp);
44613 else if (n_words == 4)
44615 rtx tmp = gen_reg_rtx (V4SImode);
44616 gcc_assert (word_mode == SImode);
44617 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44618 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44619 emit_move_insn (target, gen_lowpart (mode, tmp));
44621 else
44622 gcc_unreachable ();
44626 /* Initialize vector TARGET via VALS. Suppress the use of MMX
44627 instructions unless MMX_OK is true. */
44629 void
44630 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44632 machine_mode mode = GET_MODE (target);
44633 machine_mode inner_mode = GET_MODE_INNER (mode);
44634 int n_elts = GET_MODE_NUNITS (mode);
44635 int n_var = 0, one_var = -1;
44636 bool all_same = true, all_const_zero = true;
44637 int i;
44638 rtx x;
44640 for (i = 0; i < n_elts; ++i)
44642 x = XVECEXP (vals, 0, i);
44643 if (!(CONST_INT_P (x)
44644 || GET_CODE (x) == CONST_DOUBLE
44645 || GET_CODE (x) == CONST_FIXED))
44646 n_var++, one_var = i;
44647 else if (x != CONST0_RTX (inner_mode))
44648 all_const_zero = false;
44649 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44650 all_same = false;
44653 /* Constants are best loaded from the constant pool. */
44654 if (n_var == 0)
44656 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44657 return;
44660 /* If all values are identical, broadcast the value. */
44661 if (all_same
44662 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44663 XVECEXP (vals, 0, 0)))
44664 return;
44666 /* Values where only one field is non-constant are best loaded from
44667 the pool and overwritten via move later. */
44668 if (n_var == 1)
44670 if (all_const_zero
44671 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44672 XVECEXP (vals, 0, one_var),
44673 one_var))
44674 return;
44676 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44677 return;
44680 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44683 void
44684 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44686 machine_mode mode = GET_MODE (target);
44687 machine_mode inner_mode = GET_MODE_INNER (mode);
44688 machine_mode half_mode;
44689 bool use_vec_merge = false;
44690 rtx tmp;
44691 static rtx (*gen_extract[6][2]) (rtx, rtx)
44693 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44694 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44695 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44696 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44697 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44698 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44700 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44702 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44703 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44704 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44705 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44706 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44707 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44709 int i, j, n;
44711 switch (mode)
44713 case V2SFmode:
44714 case V2SImode:
44715 if (mmx_ok)
44717 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44718 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
44719 if (elt == 0)
44720 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44721 else
44722 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44723 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44724 return;
44726 break;
44728 case V2DImode:
44729 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
44730 if (use_vec_merge)
44731 break;
44733 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44734 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
44735 if (elt == 0)
44736 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44737 else
44738 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44739 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44740 return;
44742 case V2DFmode:
44744 rtx op0, op1;
44746 /* For the two element vectors, we implement a VEC_CONCAT with
44747 the extraction of the other element. */
44749 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
44750 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
44752 if (elt == 0)
44753 op0 = val, op1 = tmp;
44754 else
44755 op0 = tmp, op1 = val;
44757 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
44758 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44760 return;
44762 case V4SFmode:
44763 use_vec_merge = TARGET_SSE4_1;
44764 if (use_vec_merge)
44765 break;
44767 switch (elt)
44769 case 0:
44770 use_vec_merge = true;
44771 break;
44773 case 1:
44774 /* tmp = target = A B C D */
44775 tmp = copy_to_reg (target);
44776 /* target = A A B B */
44777 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
44778 /* target = X A B B */
44779 ix86_expand_vector_set (false, target, val, 0);
44780 /* target = A X C D */
44781 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44782 const1_rtx, const0_rtx,
44783 GEN_INT (2+4), GEN_INT (3+4)));
44784 return;
44786 case 2:
44787 /* tmp = target = A B C D */
44788 tmp = copy_to_reg (target);
44789 /* tmp = X B C D */
44790 ix86_expand_vector_set (false, tmp, val, 0);
44791 /* target = A B X D */
44792 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44793 const0_rtx, const1_rtx,
44794 GEN_INT (0+4), GEN_INT (3+4)));
44795 return;
44797 case 3:
44798 /* tmp = target = A B C D */
44799 tmp = copy_to_reg (target);
44800 /* tmp = X B C D */
44801 ix86_expand_vector_set (false, tmp, val, 0);
44802 /* target = A B X D */
44803 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44804 const0_rtx, const1_rtx,
44805 GEN_INT (2+4), GEN_INT (0+4)));
44806 return;
44808 default:
44809 gcc_unreachable ();
44811 break;
44813 case V4SImode:
44814 use_vec_merge = TARGET_SSE4_1;
44815 if (use_vec_merge)
44816 break;
44818 /* Element 0 handled by vec_merge below. */
44819 if (elt == 0)
44821 use_vec_merge = true;
44822 break;
44825 if (TARGET_SSE2)
44827 /* With SSE2, use integer shuffles to swap element 0 and ELT,
44828 store into element 0, then shuffle them back. */
44830 rtx order[4];
44832 order[0] = GEN_INT (elt);
44833 order[1] = const1_rtx;
44834 order[2] = const2_rtx;
44835 order[3] = GEN_INT (3);
44836 order[elt] = const0_rtx;
44838 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44839 order[1], order[2], order[3]));
44841 ix86_expand_vector_set (false, target, val, 0);
44843 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44844 order[1], order[2], order[3]));
44846 else
44848 /* For SSE1, we have to reuse the V4SF code. */
44849 rtx t = gen_reg_rtx (V4SFmode);
44850 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
44851 emit_move_insn (target, gen_lowpart (mode, t));
44853 return;
44855 case V8HImode:
44856 use_vec_merge = TARGET_SSE2;
44857 break;
44858 case V4HImode:
44859 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44860 break;
44862 case V16QImode:
44863 use_vec_merge = TARGET_SSE4_1;
44864 break;
44866 case V8QImode:
44867 break;
44869 case V32QImode:
44870 half_mode = V16QImode;
44871 j = 0;
44872 n = 16;
44873 goto half;
44875 case V16HImode:
44876 half_mode = V8HImode;
44877 j = 1;
44878 n = 8;
44879 goto half;
44881 case V8SImode:
44882 half_mode = V4SImode;
44883 j = 2;
44884 n = 4;
44885 goto half;
44887 case V4DImode:
44888 half_mode = V2DImode;
44889 j = 3;
44890 n = 2;
44891 goto half;
44893 case V8SFmode:
44894 half_mode = V4SFmode;
44895 j = 4;
44896 n = 4;
44897 goto half;
44899 case V4DFmode:
44900 half_mode = V2DFmode;
44901 j = 5;
44902 n = 2;
44903 goto half;
44905 half:
44906 /* Compute offset. */
44907 i = elt / n;
44908 elt %= n;
44910 gcc_assert (i <= 1);
44912 /* Extract the half. */
44913 tmp = gen_reg_rtx (half_mode);
44914 emit_insn (gen_extract[j][i] (tmp, target));
44916 /* Put val in tmp at elt. */
44917 ix86_expand_vector_set (false, tmp, val, elt);
44919 /* Put it back. */
44920 emit_insn (gen_insert[j][i] (target, target, tmp));
44921 return;
44923 case V8DFmode:
44924 if (TARGET_AVX512F)
44926 tmp = gen_reg_rtx (mode);
44927 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44928 gen_rtx_VEC_DUPLICATE (mode, val)));
44929 emit_insn (gen_avx512f_blendmv8df (target, tmp, target,
44930 force_reg (QImode, GEN_INT (1 << elt))));
44931 return;
44933 else
44934 break;
44935 case V8DImode:
44936 if (TARGET_AVX512F)
44938 tmp = gen_reg_rtx (mode);
44939 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44940 gen_rtx_VEC_DUPLICATE (mode, val)));
44941 emit_insn (gen_avx512f_blendmv8di (target, tmp, target,
44942 force_reg (QImode, GEN_INT (1 << elt))));
44943 return;
44945 else
44946 break;
44947 case V16SFmode:
44948 if (TARGET_AVX512F)
44950 tmp = gen_reg_rtx (mode);
44951 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44952 gen_rtx_VEC_DUPLICATE (mode, val)));
44953 emit_insn (gen_avx512f_blendmv16sf (target, tmp, target,
44954 force_reg (HImode, GEN_INT (1 << elt))));
44955 return;
44957 else
44958 break;
44959 case V16SImode:
44960 if (TARGET_AVX512F)
44962 tmp = gen_reg_rtx (mode);
44963 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44964 gen_rtx_VEC_DUPLICATE (mode, val)));
44965 emit_insn (gen_avx512f_blendmv16si (target, tmp, target,
44966 force_reg (HImode, GEN_INT (1 << elt))));
44967 return;
44969 else
44970 break;
44971 case V32HImode:
44972 if (TARGET_AVX512F && TARGET_AVX512BW)
44974 tmp = gen_reg_rtx (mode);
44975 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44976 gen_rtx_VEC_DUPLICATE (mode, val)));
44977 emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target,
44978 force_reg (SImode, GEN_INT (1 << elt))));
44979 return;
44981 else
44982 break;
44983 case V64QImode:
44984 if (TARGET_AVX512F && TARGET_AVX512BW)
44986 tmp = gen_reg_rtx (mode);
44987 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44988 gen_rtx_VEC_DUPLICATE (mode, val)));
44989 emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target,
44990 force_reg (DImode, GEN_INT (1 << elt))));
44991 return;
44993 else
44994 break;
44996 default:
44997 break;
45000 if (use_vec_merge)
45002 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
45003 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
45004 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45006 else
45008 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45010 emit_move_insn (mem, target);
45012 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45013 emit_move_insn (tmp, val);
45015 emit_move_insn (target, mem);
45019 void
45020 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
45022 machine_mode mode = GET_MODE (vec);
45023 machine_mode inner_mode = GET_MODE_INNER (mode);
45024 bool use_vec_extr = false;
45025 rtx tmp;
45027 switch (mode)
45029 case V2SImode:
45030 case V2SFmode:
45031 if (!mmx_ok)
45032 break;
45033 /* FALLTHRU */
45035 case V2DFmode:
45036 case V2DImode:
45037 use_vec_extr = true;
45038 break;
45040 case V4SFmode:
45041 use_vec_extr = TARGET_SSE4_1;
45042 if (use_vec_extr)
45043 break;
45045 switch (elt)
45047 case 0:
45048 tmp = vec;
45049 break;
45051 case 1:
45052 case 3:
45053 tmp = gen_reg_rtx (mode);
45054 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
45055 GEN_INT (elt), GEN_INT (elt),
45056 GEN_INT (elt+4), GEN_INT (elt+4)));
45057 break;
45059 case 2:
45060 tmp = gen_reg_rtx (mode);
45061 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
45062 break;
45064 default:
45065 gcc_unreachable ();
45067 vec = tmp;
45068 use_vec_extr = true;
45069 elt = 0;
45070 break;
45072 case V4SImode:
45073 use_vec_extr = TARGET_SSE4_1;
45074 if (use_vec_extr)
45075 break;
45077 if (TARGET_SSE2)
45079 switch (elt)
45081 case 0:
45082 tmp = vec;
45083 break;
45085 case 1:
45086 case 3:
45087 tmp = gen_reg_rtx (mode);
45088 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
45089 GEN_INT (elt), GEN_INT (elt),
45090 GEN_INT (elt), GEN_INT (elt)));
45091 break;
45093 case 2:
45094 tmp = gen_reg_rtx (mode);
45095 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
45096 break;
45098 default:
45099 gcc_unreachable ();
45101 vec = tmp;
45102 use_vec_extr = true;
45103 elt = 0;
45105 else
45107 /* For SSE1, we have to reuse the V4SF code. */
45108 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
45109 gen_lowpart (V4SFmode, vec), elt);
45110 return;
45112 break;
45114 case V8HImode:
45115 use_vec_extr = TARGET_SSE2;
45116 break;
45117 case V4HImode:
45118 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45119 break;
45121 case V16QImode:
45122 use_vec_extr = TARGET_SSE4_1;
45123 break;
45125 case V8SFmode:
45126 if (TARGET_AVX)
45128 tmp = gen_reg_rtx (V4SFmode);
45129 if (elt < 4)
45130 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
45131 else
45132 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
45133 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45134 return;
45136 break;
45138 case V4DFmode:
45139 if (TARGET_AVX)
45141 tmp = gen_reg_rtx (V2DFmode);
45142 if (elt < 2)
45143 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
45144 else
45145 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
45146 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45147 return;
45149 break;
45151 case V32QImode:
45152 if (TARGET_AVX)
45154 tmp = gen_reg_rtx (V16QImode);
45155 if (elt < 16)
45156 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
45157 else
45158 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
45159 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45160 return;
45162 break;
45164 case V16HImode:
45165 if (TARGET_AVX)
45167 tmp = gen_reg_rtx (V8HImode);
45168 if (elt < 8)
45169 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
45170 else
45171 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
45172 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45173 return;
45175 break;
45177 case V8SImode:
45178 if (TARGET_AVX)
45180 tmp = gen_reg_rtx (V4SImode);
45181 if (elt < 4)
45182 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
45183 else
45184 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
45185 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45186 return;
45188 break;
45190 case V4DImode:
45191 if (TARGET_AVX)
45193 tmp = gen_reg_rtx (V2DImode);
45194 if (elt < 2)
45195 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
45196 else
45197 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
45198 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45199 return;
45201 break;
45203 case V32HImode:
45204 if (TARGET_AVX512BW)
45206 tmp = gen_reg_rtx (V16HImode);
45207 if (elt < 16)
45208 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
45209 else
45210 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
45211 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45212 return;
45214 break;
45216 case V64QImode:
45217 if (TARGET_AVX512BW)
45219 tmp = gen_reg_rtx (V32QImode);
45220 if (elt < 32)
45221 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
45222 else
45223 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
45224 ix86_expand_vector_extract (false, target, tmp, elt & 31);
45225 return;
45227 break;
45229 case V16SFmode:
45230 tmp = gen_reg_rtx (V8SFmode);
45231 if (elt < 8)
45232 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
45233 else
45234 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
45235 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45236 return;
45238 case V8DFmode:
45239 tmp = gen_reg_rtx (V4DFmode);
45240 if (elt < 4)
45241 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
45242 else
45243 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
45244 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45245 return;
45247 case V16SImode:
45248 tmp = gen_reg_rtx (V8SImode);
45249 if (elt < 8)
45250 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
45251 else
45252 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
45253 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45254 return;
45256 case V8DImode:
45257 tmp = gen_reg_rtx (V4DImode);
45258 if (elt < 4)
45259 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
45260 else
45261 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
45262 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45263 return;
45265 case V8QImode:
45266 /* ??? Could extract the appropriate HImode element and shift. */
45267 default:
45268 break;
45271 if (use_vec_extr)
45273 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
45274 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
45276 /* Let the rtl optimizers know about the zero extension performed. */
45277 if (inner_mode == QImode || inner_mode == HImode)
45279 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
45280 target = gen_lowpart (SImode, target);
45283 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45285 else
45287 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45289 emit_move_insn (mem, vec);
45291 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45292 emit_move_insn (target, tmp);
45296 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
45297 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
45298 The upper bits of DEST are undefined, though they shouldn't cause
45299 exceptions (some bits from src or all zeros are ok). */
45301 static void
45302 emit_reduc_half (rtx dest, rtx src, int i)
45304 rtx tem, d = dest;
45305 switch (GET_MODE (src))
45307 case V4SFmode:
45308 if (i == 128)
45309 tem = gen_sse_movhlps (dest, src, src);
45310 else
45311 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
45312 GEN_INT (1 + 4), GEN_INT (1 + 4));
45313 break;
45314 case V2DFmode:
45315 tem = gen_vec_interleave_highv2df (dest, src, src);
45316 break;
45317 case V16QImode:
45318 case V8HImode:
45319 case V4SImode:
45320 case V2DImode:
45321 d = gen_reg_rtx (V1TImode);
45322 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
45323 GEN_INT (i / 2));
45324 break;
45325 case V8SFmode:
45326 if (i == 256)
45327 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
45328 else
45329 tem = gen_avx_shufps256 (dest, src, src,
45330 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
45331 break;
45332 case V4DFmode:
45333 if (i == 256)
45334 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
45335 else
45336 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
45337 break;
45338 case V32QImode:
45339 case V16HImode:
45340 case V8SImode:
45341 case V4DImode:
45342 if (i == 256)
45344 if (GET_MODE (dest) != V4DImode)
45345 d = gen_reg_rtx (V4DImode);
45346 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
45347 gen_lowpart (V4DImode, src),
45348 const1_rtx);
45350 else
45352 d = gen_reg_rtx (V2TImode);
45353 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
45354 GEN_INT (i / 2));
45356 break;
45357 case V64QImode:
45358 case V32HImode:
45359 case V16SImode:
45360 case V16SFmode:
45361 case V8DImode:
45362 case V8DFmode:
45363 if (i > 128)
45364 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
45365 gen_lowpart (V16SImode, src),
45366 gen_lowpart (V16SImode, src),
45367 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
45368 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
45369 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
45370 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
45371 GEN_INT (0xC), GEN_INT (0xD),
45372 GEN_INT (0xE), GEN_INT (0xF),
45373 GEN_INT (0x10), GEN_INT (0x11),
45374 GEN_INT (0x12), GEN_INT (0x13),
45375 GEN_INT (0x14), GEN_INT (0x15),
45376 GEN_INT (0x16), GEN_INT (0x17));
45377 else
45378 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45379 gen_lowpart (V16SImode, src),
45380 GEN_INT (i == 128 ? 0x2 : 0x1),
45381 GEN_INT (0x3),
45382 GEN_INT (0x3),
45383 GEN_INT (0x3),
45384 GEN_INT (i == 128 ? 0x6 : 0x5),
45385 GEN_INT (0x7),
45386 GEN_INT (0x7),
45387 GEN_INT (0x7),
45388 GEN_INT (i == 128 ? 0xA : 0x9),
45389 GEN_INT (0xB),
45390 GEN_INT (0xB),
45391 GEN_INT (0xB),
45392 GEN_INT (i == 128 ? 0xE : 0xD),
45393 GEN_INT (0xF),
45394 GEN_INT (0xF),
45395 GEN_INT (0xF));
45396 break;
45397 default:
45398 gcc_unreachable ();
45400 emit_insn (tem);
45401 if (d != dest)
45402 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45405 /* Expand a vector reduction. FN is the binary pattern to reduce;
45406 DEST is the destination; IN is the input vector. */
45408 void
45409 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45411 rtx half, dst, vec = in;
45412 machine_mode mode = GET_MODE (in);
45413 int i;
45415 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
45416 if (TARGET_SSE4_1
45417 && mode == V8HImode
45418 && fn == gen_uminv8hi3)
45420 emit_insn (gen_sse4_1_phminposuw (dest, in));
45421 return;
45424 for (i = GET_MODE_BITSIZE (mode);
45425 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
45426 i >>= 1)
45428 half = gen_reg_rtx (mode);
45429 emit_reduc_half (half, vec, i);
45430 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
45431 dst = dest;
45432 else
45433 dst = gen_reg_rtx (mode);
45434 emit_insn (fn (dst, half, vec));
45435 vec = dst;
45439 /* Target hook for scalar_mode_supported_p. */
45440 static bool
45441 ix86_scalar_mode_supported_p (machine_mode mode)
45443 if (DECIMAL_FLOAT_MODE_P (mode))
45444 return default_decimal_float_supported_p ();
45445 else if (mode == TFmode)
45446 return true;
45447 else
45448 return default_scalar_mode_supported_p (mode);
45451 /* Implements target hook vector_mode_supported_p. */
45452 static bool
45453 ix86_vector_mode_supported_p (machine_mode mode)
45455 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45456 return true;
45457 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45458 return true;
45459 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45460 return true;
45461 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45462 return true;
45463 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45464 return true;
45465 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45466 return true;
45467 return false;
45470 /* Implement target hook libgcc_floating_mode_supported_p. */
45471 static bool
45472 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45474 switch (mode)
45476 case SFmode:
45477 case DFmode:
45478 case XFmode:
45479 return true;
45481 case TFmode:
45482 #ifdef IX86_NO_LIBGCC_TFMODE
45483 return false;
45484 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45485 return TARGET_LONG_DOUBLE_128;
45486 #else
45487 return true;
45488 #endif
45490 default:
45491 return false;
45495 /* Target hook for c_mode_for_suffix. */
45496 static machine_mode
45497 ix86_c_mode_for_suffix (char suffix)
45499 if (suffix == 'q')
45500 return TFmode;
45501 if (suffix == 'w')
45502 return XFmode;
45504 return VOIDmode;
45507 /* Worker function for TARGET_MD_ASM_CLOBBERS.
45509 We do this in the new i386 backend to maintain source compatibility
45510 with the old cc0-based compiler. */
45512 static tree
45513 ix86_md_asm_clobbers (tree, tree, tree clobbers)
45515 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
45516 clobbers);
45517 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
45518 clobbers);
45519 return clobbers;
45522 /* Implements target vector targetm.asm.encode_section_info. */
45524 static void ATTRIBUTE_UNUSED
45525 ix86_encode_section_info (tree decl, rtx rtl, int first)
45527 default_encode_section_info (decl, rtl, first);
45529 if (ix86_in_large_data_p (decl))
45530 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45533 /* Worker function for REVERSE_CONDITION. */
45535 enum rtx_code
45536 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45538 return (mode != CCFPmode && mode != CCFPUmode
45539 ? reverse_condition (code)
45540 : reverse_condition_maybe_unordered (code));
45543 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45544 to OPERANDS[0]. */
45546 const char *
45547 output_387_reg_move (rtx insn, rtx *operands)
45549 if (REG_P (operands[0]))
45551 if (REG_P (operands[1])
45552 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45554 if (REGNO (operands[0]) == FIRST_STACK_REG)
45555 return output_387_ffreep (operands, 0);
45556 return "fstp\t%y0";
45558 if (STACK_TOP_P (operands[0]))
45559 return "fld%Z1\t%y1";
45560 return "fst\t%y0";
45562 else if (MEM_P (operands[0]))
45564 gcc_assert (REG_P (operands[1]));
45565 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45566 return "fstp%Z0\t%y0";
45567 else
45569 /* There is no non-popping store to memory for XFmode.
45570 So if we need one, follow the store with a load. */
45571 if (GET_MODE (operands[0]) == XFmode)
45572 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45573 else
45574 return "fst%Z0\t%y0";
45577 else
45578 gcc_unreachable();
45581 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45582 FP status register is set. */
45584 void
45585 ix86_emit_fp_unordered_jump (rtx label)
45587 rtx reg = gen_reg_rtx (HImode);
45588 rtx temp;
45590 emit_insn (gen_x86_fnstsw_1 (reg));
45592 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
45594 emit_insn (gen_x86_sahf_1 (reg));
45596 temp = gen_rtx_REG (CCmode, FLAGS_REG);
45597 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
45599 else
45601 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
45603 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
45604 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
45607 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
45608 gen_rtx_LABEL_REF (VOIDmode, label),
45609 pc_rtx);
45610 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
45612 emit_jump_insn (temp);
45613 predict_jump (REG_BR_PROB_BASE * 10 / 100);
45616 /* Output code to perform a log1p XFmode calculation. */
45618 void ix86_emit_i387_log1p (rtx op0, rtx op1)
45620 rtx_code_label *label1 = gen_label_rtx ();
45621 rtx_code_label *label2 = gen_label_rtx ();
45623 rtx tmp = gen_reg_rtx (XFmode);
45624 rtx tmp2 = gen_reg_rtx (XFmode);
45625 rtx test;
45627 emit_insn (gen_absxf2 (tmp, op1));
45628 test = gen_rtx_GE (VOIDmode, tmp,
45629 CONST_DOUBLE_FROM_REAL_VALUE (
45630 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
45631 XFmode));
45632 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
45634 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45635 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
45636 emit_jump (label2);
45638 emit_label (label1);
45639 emit_move_insn (tmp, CONST1_RTX (XFmode));
45640 emit_insn (gen_addxf3 (tmp, op1, tmp));
45641 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45642 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
45644 emit_label (label2);
45647 /* Emit code for round calculation. */
45648 void ix86_emit_i387_round (rtx op0, rtx op1)
45650 machine_mode inmode = GET_MODE (op1);
45651 machine_mode outmode = GET_MODE (op0);
45652 rtx e1, e2, res, tmp, tmp1, half;
45653 rtx scratch = gen_reg_rtx (HImode);
45654 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
45655 rtx_code_label *jump_label = gen_label_rtx ();
45656 rtx insn;
45657 rtx (*gen_abs) (rtx, rtx);
45658 rtx (*gen_neg) (rtx, rtx);
45660 switch (inmode)
45662 case SFmode:
45663 gen_abs = gen_abssf2;
45664 break;
45665 case DFmode:
45666 gen_abs = gen_absdf2;
45667 break;
45668 case XFmode:
45669 gen_abs = gen_absxf2;
45670 break;
45671 default:
45672 gcc_unreachable ();
45675 switch (outmode)
45677 case SFmode:
45678 gen_neg = gen_negsf2;
45679 break;
45680 case DFmode:
45681 gen_neg = gen_negdf2;
45682 break;
45683 case XFmode:
45684 gen_neg = gen_negxf2;
45685 break;
45686 case HImode:
45687 gen_neg = gen_neghi2;
45688 break;
45689 case SImode:
45690 gen_neg = gen_negsi2;
45691 break;
45692 case DImode:
45693 gen_neg = gen_negdi2;
45694 break;
45695 default:
45696 gcc_unreachable ();
45699 e1 = gen_reg_rtx (inmode);
45700 e2 = gen_reg_rtx (inmode);
45701 res = gen_reg_rtx (outmode);
45703 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
45705 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
45707 /* scratch = fxam(op1) */
45708 emit_insn (gen_rtx_SET (VOIDmode, scratch,
45709 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
45710 UNSPEC_FXAM)));
45711 /* e1 = fabs(op1) */
45712 emit_insn (gen_abs (e1, op1));
45714 /* e2 = e1 + 0.5 */
45715 half = force_reg (inmode, half);
45716 emit_insn (gen_rtx_SET (VOIDmode, e2,
45717 gen_rtx_PLUS (inmode, e1, half)));
45719 /* res = floor(e2) */
45720 if (inmode != XFmode)
45722 tmp1 = gen_reg_rtx (XFmode);
45724 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
45725 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
45727 else
45728 tmp1 = e2;
45730 switch (outmode)
45732 case SFmode:
45733 case DFmode:
45735 rtx tmp0 = gen_reg_rtx (XFmode);
45737 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
45739 emit_insn (gen_rtx_SET (VOIDmode, res,
45740 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
45741 UNSPEC_TRUNC_NOOP)));
45743 break;
45744 case XFmode:
45745 emit_insn (gen_frndintxf2_floor (res, tmp1));
45746 break;
45747 case HImode:
45748 emit_insn (gen_lfloorxfhi2 (res, tmp1));
45749 break;
45750 case SImode:
45751 emit_insn (gen_lfloorxfsi2 (res, tmp1));
45752 break;
45753 case DImode:
45754 emit_insn (gen_lfloorxfdi2 (res, tmp1));
45755 break;
45756 default:
45757 gcc_unreachable ();
45760 /* flags = signbit(a) */
45761 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
45763 /* if (flags) then res = -res */
45764 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
45765 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
45766 gen_rtx_LABEL_REF (VOIDmode, jump_label),
45767 pc_rtx);
45768 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
45769 predict_jump (REG_BR_PROB_BASE * 50 / 100);
45770 JUMP_LABEL (insn) = jump_label;
45772 emit_insn (gen_neg (res, res));
45774 emit_label (jump_label);
45775 LABEL_NUSES (jump_label) = 1;
45777 emit_move_insn (op0, res);
45780 /* Output code to perform a Newton-Rhapson approximation of a single precision
45781 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
45783 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
45785 rtx x0, x1, e0, e1;
45787 x0 = gen_reg_rtx (mode);
45788 e0 = gen_reg_rtx (mode);
45789 e1 = gen_reg_rtx (mode);
45790 x1 = gen_reg_rtx (mode);
45792 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
45794 b = force_reg (mode, b);
45796 /* x0 = rcp(b) estimate */
45797 if (mode == V16SFmode || mode == V8DFmode)
45798 emit_insn (gen_rtx_SET (VOIDmode, x0,
45799 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45800 UNSPEC_RCP14)));
45801 else
45802 emit_insn (gen_rtx_SET (VOIDmode, x0,
45803 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45804 UNSPEC_RCP)));
45806 /* e0 = x0 * b */
45807 emit_insn (gen_rtx_SET (VOIDmode, e0,
45808 gen_rtx_MULT (mode, x0, b)));
45810 /* e0 = x0 * e0 */
45811 emit_insn (gen_rtx_SET (VOIDmode, e0,
45812 gen_rtx_MULT (mode, x0, e0)));
45814 /* e1 = x0 + x0 */
45815 emit_insn (gen_rtx_SET (VOIDmode, e1,
45816 gen_rtx_PLUS (mode, x0, x0)));
45818 /* x1 = e1 - e0 */
45819 emit_insn (gen_rtx_SET (VOIDmode, x1,
45820 gen_rtx_MINUS (mode, e1, e0)));
45822 /* res = a * x1 */
45823 emit_insn (gen_rtx_SET (VOIDmode, res,
45824 gen_rtx_MULT (mode, a, x1)));
45827 /* Output code to perform a Newton-Rhapson approximation of a
45828 single precision floating point [reciprocal] square root. */
45830 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
45831 bool recip)
45833 rtx x0, e0, e1, e2, e3, mthree, mhalf;
45834 REAL_VALUE_TYPE r;
45835 int unspec;
45837 x0 = gen_reg_rtx (mode);
45838 e0 = gen_reg_rtx (mode);
45839 e1 = gen_reg_rtx (mode);
45840 e2 = gen_reg_rtx (mode);
45841 e3 = gen_reg_rtx (mode);
45843 real_from_integer (&r, VOIDmode, -3, SIGNED);
45844 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45846 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
45847 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45848 unspec = UNSPEC_RSQRT;
45850 if (VECTOR_MODE_P (mode))
45852 mthree = ix86_build_const_vector (mode, true, mthree);
45853 mhalf = ix86_build_const_vector (mode, true, mhalf);
45854 /* There is no 512-bit rsqrt. There is however rsqrt14. */
45855 if (GET_MODE_SIZE (mode) == 64)
45856 unspec = UNSPEC_RSQRT14;
45859 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
45860 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
45862 a = force_reg (mode, a);
45864 /* x0 = rsqrt(a) estimate */
45865 emit_insn (gen_rtx_SET (VOIDmode, x0,
45866 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45867 unspec)));
45869 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
45870 if (!recip)
45872 rtx zero, mask;
45874 zero = gen_reg_rtx (mode);
45875 mask = gen_reg_rtx (mode);
45877 zero = force_reg (mode, CONST0_RTX(mode));
45879 /* Handle masked compare. */
45880 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
45882 mask = gen_reg_rtx (HImode);
45883 /* Imm value 0x4 corresponds to not-equal comparison. */
45884 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
45885 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
45887 else
45889 emit_insn (gen_rtx_SET (VOIDmode, mask,
45890 gen_rtx_NE (mode, zero, a)));
45892 emit_insn (gen_rtx_SET (VOIDmode, x0,
45893 gen_rtx_AND (mode, x0, mask)));
45897 /* e0 = x0 * a */
45898 emit_insn (gen_rtx_SET (VOIDmode, e0,
45899 gen_rtx_MULT (mode, x0, a)));
45900 /* e1 = e0 * x0 */
45901 emit_insn (gen_rtx_SET (VOIDmode, e1,
45902 gen_rtx_MULT (mode, e0, x0)));
45904 /* e2 = e1 - 3. */
45905 mthree = force_reg (mode, mthree);
45906 emit_insn (gen_rtx_SET (VOIDmode, e2,
45907 gen_rtx_PLUS (mode, e1, mthree)));
45909 mhalf = force_reg (mode, mhalf);
45910 if (recip)
45911 /* e3 = -.5 * x0 */
45912 emit_insn (gen_rtx_SET (VOIDmode, e3,
45913 gen_rtx_MULT (mode, x0, mhalf)));
45914 else
45915 /* e3 = -.5 * e0 */
45916 emit_insn (gen_rtx_SET (VOIDmode, e3,
45917 gen_rtx_MULT (mode, e0, mhalf)));
45918 /* ret = e2 * e3 */
45919 emit_insn (gen_rtx_SET (VOIDmode, res,
45920 gen_rtx_MULT (mode, e2, e3)));
45923 #ifdef TARGET_SOLARIS
45924 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
45926 static void
45927 i386_solaris_elf_named_section (const char *name, unsigned int flags,
45928 tree decl)
45930 /* With Binutils 2.15, the "@unwind" marker must be specified on
45931 every occurrence of the ".eh_frame" section, not just the first
45932 one. */
45933 if (TARGET_64BIT
45934 && strcmp (name, ".eh_frame") == 0)
45936 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
45937 flags & SECTION_WRITE ? "aw" : "a");
45938 return;
45941 #ifndef USE_GAS
45942 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
45944 solaris_elf_asm_comdat_section (name, flags, decl);
45945 return;
45947 #endif
45949 default_elf_asm_named_section (name, flags, decl);
45951 #endif /* TARGET_SOLARIS */
45953 /* Return the mangling of TYPE if it is an extended fundamental type. */
45955 static const char *
45956 ix86_mangle_type (const_tree type)
45958 type = TYPE_MAIN_VARIANT (type);
45960 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
45961 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
45962 return NULL;
45964 switch (TYPE_MODE (type))
45966 case TFmode:
45967 /* __float128 is "g". */
45968 return "g";
45969 case XFmode:
45970 /* "long double" or __float80 is "e". */
45971 return "e";
45972 default:
45973 return NULL;
45977 /* For 32-bit code we can save PIC register setup by using
45978 __stack_chk_fail_local hidden function instead of calling
45979 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
45980 register, so it is better to call __stack_chk_fail directly. */
45982 static tree ATTRIBUTE_UNUSED
45983 ix86_stack_protect_fail (void)
45985 return TARGET_64BIT
45986 ? default_external_stack_protect_fail ()
45987 : default_hidden_stack_protect_fail ();
45990 /* Select a format to encode pointers in exception handling data. CODE
45991 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
45992 true if the symbol may be affected by dynamic relocations.
45994 ??? All x86 object file formats are capable of representing this.
45995 After all, the relocation needed is the same as for the call insn.
45996 Whether or not a particular assembler allows us to enter such, I
45997 guess we'll have to see. */
45999 asm_preferred_eh_data_format (int code, int global)
46001 if (flag_pic)
46003 int type = DW_EH_PE_sdata8;
46004 if (!TARGET_64BIT
46005 || ix86_cmodel == CM_SMALL_PIC
46006 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
46007 type = DW_EH_PE_sdata4;
46008 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
46010 if (ix86_cmodel == CM_SMALL
46011 || (ix86_cmodel == CM_MEDIUM && code))
46012 return DW_EH_PE_udata4;
46013 return DW_EH_PE_absptr;
46016 /* Expand copysign from SIGN to the positive value ABS_VALUE
46017 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
46018 the sign-bit. */
46019 static void
46020 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
46022 machine_mode mode = GET_MODE (sign);
46023 rtx sgn = gen_reg_rtx (mode);
46024 if (mask == NULL_RTX)
46026 machine_mode vmode;
46028 if (mode == SFmode)
46029 vmode = V4SFmode;
46030 else if (mode == DFmode)
46031 vmode = V2DFmode;
46032 else
46033 vmode = mode;
46035 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
46036 if (!VECTOR_MODE_P (mode))
46038 /* We need to generate a scalar mode mask in this case. */
46039 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46040 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46041 mask = gen_reg_rtx (mode);
46042 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
46045 else
46046 mask = gen_rtx_NOT (mode, mask);
46047 emit_insn (gen_rtx_SET (VOIDmode, sgn,
46048 gen_rtx_AND (mode, mask, sign)));
46049 emit_insn (gen_rtx_SET (VOIDmode, result,
46050 gen_rtx_IOR (mode, abs_value, sgn)));
46053 /* Expand fabs (OP0) and return a new rtx that holds the result. The
46054 mask for masking out the sign-bit is stored in *SMASK, if that is
46055 non-null. */
46056 static rtx
46057 ix86_expand_sse_fabs (rtx op0, rtx *smask)
46059 machine_mode vmode, mode = GET_MODE (op0);
46060 rtx xa, mask;
46062 xa = gen_reg_rtx (mode);
46063 if (mode == SFmode)
46064 vmode = V4SFmode;
46065 else if (mode == DFmode)
46066 vmode = V2DFmode;
46067 else
46068 vmode = mode;
46069 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
46070 if (!VECTOR_MODE_P (mode))
46072 /* We need to generate a scalar mode mask in this case. */
46073 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46074 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46075 mask = gen_reg_rtx (mode);
46076 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
46078 emit_insn (gen_rtx_SET (VOIDmode, xa,
46079 gen_rtx_AND (mode, op0, mask)));
46081 if (smask)
46082 *smask = mask;
46084 return xa;
46087 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
46088 swapping the operands if SWAP_OPERANDS is true. The expanded
46089 code is a forward jump to a newly created label in case the
46090 comparison is true. The generated label rtx is returned. */
46091 static rtx_code_label *
46092 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
46093 bool swap_operands)
46095 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
46096 rtx_code_label *label;
46097 rtx tmp;
46099 if (swap_operands)
46100 std::swap (op0, op1);
46102 label = gen_label_rtx ();
46103 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
46104 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46105 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
46106 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
46107 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
46108 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
46109 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
46110 JUMP_LABEL (tmp) = label;
46112 return label;
46115 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
46116 using comparison code CODE. Operands are swapped for the comparison if
46117 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
46118 static rtx
46119 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
46120 bool swap_operands)
46122 rtx (*insn)(rtx, rtx, rtx, rtx);
46123 machine_mode mode = GET_MODE (op0);
46124 rtx mask = gen_reg_rtx (mode);
46126 if (swap_operands)
46127 std::swap (op0, op1);
46129 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
46131 emit_insn (insn (mask, op0, op1,
46132 gen_rtx_fmt_ee (code, mode, op0, op1)));
46133 return mask;
46136 /* Generate and return a rtx of mode MODE for 2**n where n is the number
46137 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
46138 static rtx
46139 ix86_gen_TWO52 (machine_mode mode)
46141 REAL_VALUE_TYPE TWO52r;
46142 rtx TWO52;
46144 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
46145 TWO52 = const_double_from_real_value (TWO52r, mode);
46146 TWO52 = force_reg (mode, TWO52);
46148 return TWO52;
46151 /* Expand SSE sequence for computing lround from OP1 storing
46152 into OP0. */
46153 void
46154 ix86_expand_lround (rtx op0, rtx op1)
46156 /* C code for the stuff we're doing below:
46157 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
46158 return (long)tmp;
46160 machine_mode mode = GET_MODE (op1);
46161 const struct real_format *fmt;
46162 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46163 rtx adj;
46165 /* load nextafter (0.5, 0.0) */
46166 fmt = REAL_MODE_FORMAT (mode);
46167 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46168 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46170 /* adj = copysign (0.5, op1) */
46171 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
46172 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
46174 /* adj = op1 + adj */
46175 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
46177 /* op0 = (imode)adj */
46178 expand_fix (op0, adj, 0);
46181 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
46182 into OPERAND0. */
46183 void
46184 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
46186 /* C code for the stuff we're doing below (for do_floor):
46187 xi = (long)op1;
46188 xi -= (double)xi > op1 ? 1 : 0;
46189 return xi;
46191 machine_mode fmode = GET_MODE (op1);
46192 machine_mode imode = GET_MODE (op0);
46193 rtx ireg, freg, tmp;
46194 rtx_code_label *label;
46196 /* reg = (long)op1 */
46197 ireg = gen_reg_rtx (imode);
46198 expand_fix (ireg, op1, 0);
46200 /* freg = (double)reg */
46201 freg = gen_reg_rtx (fmode);
46202 expand_float (freg, ireg, 0);
46204 /* ireg = (freg > op1) ? ireg - 1 : ireg */
46205 label = ix86_expand_sse_compare_and_jump (UNLE,
46206 freg, op1, !do_floor);
46207 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
46208 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
46209 emit_move_insn (ireg, tmp);
46211 emit_label (label);
46212 LABEL_NUSES (label) = 1;
46214 emit_move_insn (op0, ireg);
46217 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
46218 result in OPERAND0. */
46219 void
46220 ix86_expand_rint (rtx operand0, rtx operand1)
46222 /* C code for the stuff we're doing below:
46223 xa = fabs (operand1);
46224 if (!isless (xa, 2**52))
46225 return operand1;
46226 xa = xa + 2**52 - 2**52;
46227 return copysign (xa, operand1);
46229 machine_mode mode = GET_MODE (operand0);
46230 rtx res, xa, TWO52, mask;
46231 rtx_code_label *label;
46233 res = gen_reg_rtx (mode);
46234 emit_move_insn (res, operand1);
46236 /* xa = abs (operand1) */
46237 xa = ix86_expand_sse_fabs (res, &mask);
46239 /* if (!isless (xa, TWO52)) goto label; */
46240 TWO52 = ix86_gen_TWO52 (mode);
46241 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46243 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46244 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46246 ix86_sse_copysign_to_positive (res, xa, res, mask);
46248 emit_label (label);
46249 LABEL_NUSES (label) = 1;
46251 emit_move_insn (operand0, res);
46254 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46255 into OPERAND0. */
46256 void
46257 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
46259 /* C code for the stuff we expand below.
46260 double xa = fabs (x), x2;
46261 if (!isless (xa, TWO52))
46262 return x;
46263 xa = xa + TWO52 - TWO52;
46264 x2 = copysign (xa, x);
46265 Compensate. Floor:
46266 if (x2 > x)
46267 x2 -= 1;
46268 Compensate. Ceil:
46269 if (x2 < x)
46270 x2 -= -1;
46271 return x2;
46273 machine_mode mode = GET_MODE (operand0);
46274 rtx xa, TWO52, tmp, one, res, mask;
46275 rtx_code_label *label;
46277 TWO52 = ix86_gen_TWO52 (mode);
46279 /* Temporary for holding the result, initialized to the input
46280 operand to ease control flow. */
46281 res = gen_reg_rtx (mode);
46282 emit_move_insn (res, operand1);
46284 /* xa = abs (operand1) */
46285 xa = ix86_expand_sse_fabs (res, &mask);
46287 /* if (!isless (xa, TWO52)) goto label; */
46288 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46290 /* xa = xa + TWO52 - TWO52; */
46291 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46292 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46294 /* xa = copysign (xa, operand1) */
46295 ix86_sse_copysign_to_positive (xa, xa, res, mask);
46297 /* generate 1.0 or -1.0 */
46298 one = force_reg (mode,
46299 const_double_from_real_value (do_floor
46300 ? dconst1 : dconstm1, mode));
46302 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46303 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46304 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46305 gen_rtx_AND (mode, one, tmp)));
46306 /* We always need to subtract here to preserve signed zero. */
46307 tmp = expand_simple_binop (mode, MINUS,
46308 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46309 emit_move_insn (res, tmp);
46311 emit_label (label);
46312 LABEL_NUSES (label) = 1;
46314 emit_move_insn (operand0, res);
46317 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46318 into OPERAND0. */
46319 void
46320 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
46322 /* C code for the stuff we expand below.
46323 double xa = fabs (x), x2;
46324 if (!isless (xa, TWO52))
46325 return x;
46326 x2 = (double)(long)x;
46327 Compensate. Floor:
46328 if (x2 > x)
46329 x2 -= 1;
46330 Compensate. Ceil:
46331 if (x2 < x)
46332 x2 += 1;
46333 if (HONOR_SIGNED_ZEROS (mode))
46334 return copysign (x2, x);
46335 return x2;
46337 machine_mode mode = GET_MODE (operand0);
46338 rtx xa, xi, TWO52, tmp, one, res, mask;
46339 rtx_code_label *label;
46341 TWO52 = ix86_gen_TWO52 (mode);
46343 /* Temporary for holding the result, initialized to the input
46344 operand to ease control flow. */
46345 res = gen_reg_rtx (mode);
46346 emit_move_insn (res, operand1);
46348 /* xa = abs (operand1) */
46349 xa = ix86_expand_sse_fabs (res, &mask);
46351 /* if (!isless (xa, TWO52)) goto label; */
46352 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46354 /* xa = (double)(long)x */
46355 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46356 expand_fix (xi, res, 0);
46357 expand_float (xa, xi, 0);
46359 /* generate 1.0 */
46360 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46362 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46363 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46364 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46365 gen_rtx_AND (mode, one, tmp)));
46366 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
46367 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46368 emit_move_insn (res, tmp);
46370 if (HONOR_SIGNED_ZEROS (mode))
46371 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46373 emit_label (label);
46374 LABEL_NUSES (label) = 1;
46376 emit_move_insn (operand0, res);
46379 /* Expand SSE sequence for computing round from OPERAND1 storing
46380 into OPERAND0. Sequence that works without relying on DImode truncation
46381 via cvttsd2siq that is only available on 64bit targets. */
46382 void
46383 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46385 /* C code for the stuff we expand below.
46386 double xa = fabs (x), xa2, x2;
46387 if (!isless (xa, TWO52))
46388 return x;
46389 Using the absolute value and copying back sign makes
46390 -0.0 -> -0.0 correct.
46391 xa2 = xa + TWO52 - TWO52;
46392 Compensate.
46393 dxa = xa2 - xa;
46394 if (dxa <= -0.5)
46395 xa2 += 1;
46396 else if (dxa > 0.5)
46397 xa2 -= 1;
46398 x2 = copysign (xa2, x);
46399 return x2;
46401 machine_mode mode = GET_MODE (operand0);
46402 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46403 rtx_code_label *label;
46405 TWO52 = ix86_gen_TWO52 (mode);
46407 /* Temporary for holding the result, initialized to the input
46408 operand to ease control flow. */
46409 res = gen_reg_rtx (mode);
46410 emit_move_insn (res, operand1);
46412 /* xa = abs (operand1) */
46413 xa = ix86_expand_sse_fabs (res, &mask);
46415 /* if (!isless (xa, TWO52)) goto label; */
46416 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46418 /* xa2 = xa + TWO52 - TWO52; */
46419 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46420 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46422 /* dxa = xa2 - xa; */
46423 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46425 /* generate 0.5, 1.0 and -0.5 */
46426 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46427 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46428 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46429 0, OPTAB_DIRECT);
46431 /* Compensate. */
46432 tmp = gen_reg_rtx (mode);
46433 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46434 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46435 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46436 gen_rtx_AND (mode, one, tmp)));
46437 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46438 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46439 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46440 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46441 gen_rtx_AND (mode, one, tmp)));
46442 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46444 /* res = copysign (xa2, operand1) */
46445 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46447 emit_label (label);
46448 LABEL_NUSES (label) = 1;
46450 emit_move_insn (operand0, res);
46453 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46454 into OPERAND0. */
46455 void
46456 ix86_expand_trunc (rtx operand0, rtx operand1)
46458 /* C code for SSE variant we expand below.
46459 double xa = fabs (x), x2;
46460 if (!isless (xa, TWO52))
46461 return x;
46462 x2 = (double)(long)x;
46463 if (HONOR_SIGNED_ZEROS (mode))
46464 return copysign (x2, x);
46465 return x2;
46467 machine_mode mode = GET_MODE (operand0);
46468 rtx xa, xi, TWO52, res, mask;
46469 rtx_code_label *label;
46471 TWO52 = ix86_gen_TWO52 (mode);
46473 /* Temporary for holding the result, initialized to the input
46474 operand to ease control flow. */
46475 res = gen_reg_rtx (mode);
46476 emit_move_insn (res, operand1);
46478 /* xa = abs (operand1) */
46479 xa = ix86_expand_sse_fabs (res, &mask);
46481 /* if (!isless (xa, TWO52)) goto label; */
46482 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46484 /* x = (double)(long)x */
46485 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46486 expand_fix (xi, res, 0);
46487 expand_float (res, xi, 0);
46489 if (HONOR_SIGNED_ZEROS (mode))
46490 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46492 emit_label (label);
46493 LABEL_NUSES (label) = 1;
46495 emit_move_insn (operand0, res);
46498 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46499 into OPERAND0. */
46500 void
46501 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46503 machine_mode mode = GET_MODE (operand0);
46504 rtx xa, mask, TWO52, one, res, smask, tmp;
46505 rtx_code_label *label;
46507 /* C code for SSE variant we expand below.
46508 double xa = fabs (x), x2;
46509 if (!isless (xa, TWO52))
46510 return x;
46511 xa2 = xa + TWO52 - TWO52;
46512 Compensate:
46513 if (xa2 > xa)
46514 xa2 -= 1.0;
46515 x2 = copysign (xa2, x);
46516 return x2;
46519 TWO52 = ix86_gen_TWO52 (mode);
46521 /* Temporary for holding the result, initialized to the input
46522 operand to ease control flow. */
46523 res = gen_reg_rtx (mode);
46524 emit_move_insn (res, operand1);
46526 /* xa = abs (operand1) */
46527 xa = ix86_expand_sse_fabs (res, &smask);
46529 /* if (!isless (xa, TWO52)) goto label; */
46530 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46532 /* res = xa + TWO52 - TWO52; */
46533 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46534 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46535 emit_move_insn (res, tmp);
46537 /* generate 1.0 */
46538 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46540 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
46541 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46542 emit_insn (gen_rtx_SET (VOIDmode, mask,
46543 gen_rtx_AND (mode, mask, one)));
46544 tmp = expand_simple_binop (mode, MINUS,
46545 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46546 emit_move_insn (res, tmp);
46548 /* res = copysign (res, operand1) */
46549 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46551 emit_label (label);
46552 LABEL_NUSES (label) = 1;
46554 emit_move_insn (operand0, res);
46557 /* Expand SSE sequence for computing round from OPERAND1 storing
46558 into OPERAND0. */
46559 void
46560 ix86_expand_round (rtx operand0, rtx operand1)
46562 /* C code for the stuff we're doing below:
46563 double xa = fabs (x);
46564 if (!isless (xa, TWO52))
46565 return x;
46566 xa = (double)(long)(xa + nextafter (0.5, 0.0));
46567 return copysign (xa, x);
46569 machine_mode mode = GET_MODE (operand0);
46570 rtx res, TWO52, xa, xi, half, mask;
46571 rtx_code_label *label;
46572 const struct real_format *fmt;
46573 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46575 /* Temporary for holding the result, initialized to the input
46576 operand to ease control flow. */
46577 res = gen_reg_rtx (mode);
46578 emit_move_insn (res, operand1);
46580 TWO52 = ix86_gen_TWO52 (mode);
46581 xa = ix86_expand_sse_fabs (res, &mask);
46582 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46584 /* load nextafter (0.5, 0.0) */
46585 fmt = REAL_MODE_FORMAT (mode);
46586 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46587 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46589 /* xa = xa + 0.5 */
46590 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46591 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46593 /* xa = (double)(int64_t)xa */
46594 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46595 expand_fix (xi, xa, 0);
46596 expand_float (xa, xi, 0);
46598 /* res = copysign (xa, operand1) */
46599 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46601 emit_label (label);
46602 LABEL_NUSES (label) = 1;
46604 emit_move_insn (operand0, res);
46607 /* Expand SSE sequence for computing round
46608 from OP1 storing into OP0 using sse4 round insn. */
46609 void
46610 ix86_expand_round_sse4 (rtx op0, rtx op1)
46612 machine_mode mode = GET_MODE (op0);
46613 rtx e1, e2, res, half;
46614 const struct real_format *fmt;
46615 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46616 rtx (*gen_copysign) (rtx, rtx, rtx);
46617 rtx (*gen_round) (rtx, rtx, rtx);
46619 switch (mode)
46621 case SFmode:
46622 gen_copysign = gen_copysignsf3;
46623 gen_round = gen_sse4_1_roundsf2;
46624 break;
46625 case DFmode:
46626 gen_copysign = gen_copysigndf3;
46627 gen_round = gen_sse4_1_rounddf2;
46628 break;
46629 default:
46630 gcc_unreachable ();
46633 /* round (a) = trunc (a + copysign (0.5, a)) */
46635 /* load nextafter (0.5, 0.0) */
46636 fmt = REAL_MODE_FORMAT (mode);
46637 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46638 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46639 half = const_double_from_real_value (pred_half, mode);
46641 /* e1 = copysign (0.5, op1) */
46642 e1 = gen_reg_rtx (mode);
46643 emit_insn (gen_copysign (e1, half, op1));
46645 /* e2 = op1 + e1 */
46646 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46648 /* res = trunc (e2) */
46649 res = gen_reg_rtx (mode);
46650 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46652 emit_move_insn (op0, res);
46656 /* Table of valid machine attributes. */
46657 static const struct attribute_spec ix86_attribute_table[] =
46659 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
46660 affects_type_identity } */
46661 /* Stdcall attribute says callee is responsible for popping arguments
46662 if they are not variable. */
46663 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46664 true },
46665 /* Fastcall attribute says callee is responsible for popping arguments
46666 if they are not variable. */
46667 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46668 true },
46669 /* Thiscall attribute says callee is responsible for popping arguments
46670 if they are not variable. */
46671 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46672 true },
46673 /* Cdecl attribute says the callee is a normal C declaration */
46674 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46675 true },
46676 /* Regparm attribute specifies how many integer arguments are to be
46677 passed in registers. */
46678 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
46679 true },
46680 /* Sseregparm attribute says we are using x86_64 calling conventions
46681 for FP arguments. */
46682 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46683 true },
46684 /* The transactional memory builtins are implicitly regparm or fastcall
46685 depending on the ABI. Override the generic do-nothing attribute that
46686 these builtins were declared with. */
46687 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
46688 true },
46689 /* force_align_arg_pointer says this function realigns the stack at entry. */
46690 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
46691 false, true, true, ix86_handle_cconv_attribute, false },
46692 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46693 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
46694 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
46695 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
46696 false },
46697 #endif
46698 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46699 false },
46700 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46701 false },
46702 #ifdef SUBTARGET_ATTRIBUTE_TABLE
46703 SUBTARGET_ATTRIBUTE_TABLE,
46704 #endif
46705 /* ms_abi and sysv_abi calling convention function attributes. */
46706 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46707 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46708 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
46709 false },
46710 { "callee_pop_aggregate_return", 1, 1, false, true, true,
46711 ix86_handle_callee_pop_aggregate_return, true },
46712 /* End element. */
46713 { NULL, 0, 0, false, false, false, NULL, false }
46716 /* Implement targetm.vectorize.builtin_vectorization_cost. */
46717 static int
46718 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
46719 tree vectype, int)
46721 unsigned elements;
46723 switch (type_of_cost)
46725 case scalar_stmt:
46726 return ix86_cost->scalar_stmt_cost;
46728 case scalar_load:
46729 return ix86_cost->scalar_load_cost;
46731 case scalar_store:
46732 return ix86_cost->scalar_store_cost;
46734 case vector_stmt:
46735 return ix86_cost->vec_stmt_cost;
46737 case vector_load:
46738 return ix86_cost->vec_align_load_cost;
46740 case vector_store:
46741 return ix86_cost->vec_store_cost;
46743 case vec_to_scalar:
46744 return ix86_cost->vec_to_scalar_cost;
46746 case scalar_to_vec:
46747 return ix86_cost->scalar_to_vec_cost;
46749 case unaligned_load:
46750 case unaligned_store:
46751 return ix86_cost->vec_unalign_load_cost;
46753 case cond_branch_taken:
46754 return ix86_cost->cond_taken_branch_cost;
46756 case cond_branch_not_taken:
46757 return ix86_cost->cond_not_taken_branch_cost;
46759 case vec_perm:
46760 case vec_promote_demote:
46761 return ix86_cost->vec_stmt_cost;
46763 case vec_construct:
46764 elements = TYPE_VECTOR_SUBPARTS (vectype);
46765 return elements / 2 + 1;
46767 default:
46768 gcc_unreachable ();
46772 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
46773 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
46774 insn every time. */
46776 static GTY(()) rtx_insn *vselect_insn;
46778 /* Initialize vselect_insn. */
46780 static void
46781 init_vselect_insn (void)
46783 unsigned i;
46784 rtx x;
46786 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
46787 for (i = 0; i < MAX_VECT_LEN; ++i)
46788 XVECEXP (x, 0, i) = const0_rtx;
46789 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
46790 const0_rtx), x);
46791 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
46792 start_sequence ();
46793 vselect_insn = emit_insn (x);
46794 end_sequence ();
46797 /* Construct (set target (vec_select op0 (parallel perm))) and
46798 return true if that's a valid instruction in the active ISA. */
46800 static bool
46801 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
46802 unsigned nelt, bool testing_p)
46804 unsigned int i;
46805 rtx x, save_vconcat;
46806 int icode;
46808 if (vselect_insn == NULL_RTX)
46809 init_vselect_insn ();
46811 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
46812 PUT_NUM_ELEM (XVEC (x, 0), nelt);
46813 for (i = 0; i < nelt; ++i)
46814 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
46815 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46816 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
46817 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
46818 SET_DEST (PATTERN (vselect_insn)) = target;
46819 icode = recog_memoized (vselect_insn);
46821 if (icode >= 0 && !testing_p)
46822 emit_insn (copy_rtx (PATTERN (vselect_insn)));
46824 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
46825 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
46826 INSN_CODE (vselect_insn) = -1;
46828 return icode >= 0;
46831 /* Similar, but generate a vec_concat from op0 and op1 as well. */
46833 static bool
46834 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
46835 const unsigned char *perm, unsigned nelt,
46836 bool testing_p)
46838 machine_mode v2mode;
46839 rtx x;
46840 bool ok;
46842 if (vselect_insn == NULL_RTX)
46843 init_vselect_insn ();
46845 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
46846 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46847 PUT_MODE (x, v2mode);
46848 XEXP (x, 0) = op0;
46849 XEXP (x, 1) = op1;
46850 ok = expand_vselect (target, x, perm, nelt, testing_p);
46851 XEXP (x, 0) = const0_rtx;
46852 XEXP (x, 1) = const0_rtx;
46853 return ok;
46856 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46857 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
46859 static bool
46860 expand_vec_perm_blend (struct expand_vec_perm_d *d)
46862 machine_mode vmode = d->vmode;
46863 unsigned i, mask, nelt = d->nelt;
46864 rtx target, op0, op1, x;
46865 rtx rperm[32], vperm;
46867 if (d->one_operand_p)
46868 return false;
46869 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
46870 && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
46872 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
46874 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
46876 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
46878 else
46879 return false;
46881 /* This is a blend, not a permute. Elements must stay in their
46882 respective lanes. */
46883 for (i = 0; i < nelt; ++i)
46885 unsigned e = d->perm[i];
46886 if (!(e == i || e == i + nelt))
46887 return false;
46890 if (d->testing_p)
46891 return true;
46893 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
46894 decision should be extracted elsewhere, so that we only try that
46895 sequence once all budget==3 options have been tried. */
46896 target = d->target;
46897 op0 = d->op0;
46898 op1 = d->op1;
46899 mask = 0;
46901 switch (vmode)
46903 case V8DFmode:
46904 case V16SFmode:
46905 case V4DFmode:
46906 case V8SFmode:
46907 case V2DFmode:
46908 case V4SFmode:
46909 case V8HImode:
46910 case V8SImode:
46911 case V32HImode:
46912 case V64QImode:
46913 case V16SImode:
46914 case V8DImode:
46915 for (i = 0; i < nelt; ++i)
46916 mask |= (d->perm[i] >= nelt) << i;
46917 break;
46919 case V2DImode:
46920 for (i = 0; i < 2; ++i)
46921 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
46922 vmode = V8HImode;
46923 goto do_subreg;
46925 case V4SImode:
46926 for (i = 0; i < 4; ++i)
46927 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46928 vmode = V8HImode;
46929 goto do_subreg;
46931 case V16QImode:
46932 /* See if bytes move in pairs so we can use pblendw with
46933 an immediate argument, rather than pblendvb with a vector
46934 argument. */
46935 for (i = 0; i < 16; i += 2)
46936 if (d->perm[i] + 1 != d->perm[i + 1])
46938 use_pblendvb:
46939 for (i = 0; i < nelt; ++i)
46940 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
46942 finish_pblendvb:
46943 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
46944 vperm = force_reg (vmode, vperm);
46946 if (GET_MODE_SIZE (vmode) == 16)
46947 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
46948 else
46949 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
46950 if (target != d->target)
46951 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46952 return true;
46955 for (i = 0; i < 8; ++i)
46956 mask |= (d->perm[i * 2] >= 16) << i;
46957 vmode = V8HImode;
46958 /* FALLTHRU */
46960 do_subreg:
46961 target = gen_reg_rtx (vmode);
46962 op0 = gen_lowpart (vmode, op0);
46963 op1 = gen_lowpart (vmode, op1);
46964 break;
46966 case V32QImode:
46967 /* See if bytes move in pairs. If not, vpblendvb must be used. */
46968 for (i = 0; i < 32; i += 2)
46969 if (d->perm[i] + 1 != d->perm[i + 1])
46970 goto use_pblendvb;
46971 /* See if bytes move in quadruplets. If yes, vpblendd
46972 with immediate can be used. */
46973 for (i = 0; i < 32; i += 4)
46974 if (d->perm[i] + 2 != d->perm[i + 2])
46975 break;
46976 if (i < 32)
46978 /* See if bytes move the same in both lanes. If yes,
46979 vpblendw with immediate can be used. */
46980 for (i = 0; i < 16; i += 2)
46981 if (d->perm[i] + 16 != d->perm[i + 16])
46982 goto use_pblendvb;
46984 /* Use vpblendw. */
46985 for (i = 0; i < 16; ++i)
46986 mask |= (d->perm[i * 2] >= 32) << i;
46987 vmode = V16HImode;
46988 goto do_subreg;
46991 /* Use vpblendd. */
46992 for (i = 0; i < 8; ++i)
46993 mask |= (d->perm[i * 4] >= 32) << i;
46994 vmode = V8SImode;
46995 goto do_subreg;
46997 case V16HImode:
46998 /* See if words move in pairs. If yes, vpblendd can be used. */
46999 for (i = 0; i < 16; i += 2)
47000 if (d->perm[i] + 1 != d->perm[i + 1])
47001 break;
47002 if (i < 16)
47004 /* See if words move the same in both lanes. If not,
47005 vpblendvb must be used. */
47006 for (i = 0; i < 8; i++)
47007 if (d->perm[i] + 8 != d->perm[i + 8])
47009 /* Use vpblendvb. */
47010 for (i = 0; i < 32; ++i)
47011 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
47013 vmode = V32QImode;
47014 nelt = 32;
47015 target = gen_reg_rtx (vmode);
47016 op0 = gen_lowpart (vmode, op0);
47017 op1 = gen_lowpart (vmode, op1);
47018 goto finish_pblendvb;
47021 /* Use vpblendw. */
47022 for (i = 0; i < 16; ++i)
47023 mask |= (d->perm[i] >= 16) << i;
47024 break;
47027 /* Use vpblendd. */
47028 for (i = 0; i < 8; ++i)
47029 mask |= (d->perm[i * 2] >= 16) << i;
47030 vmode = V8SImode;
47031 goto do_subreg;
47033 case V4DImode:
47034 /* Use vpblendd. */
47035 for (i = 0; i < 4; ++i)
47036 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
47037 vmode = V8SImode;
47038 goto do_subreg;
47040 default:
47041 gcc_unreachable ();
47044 /* This matches five different patterns with the different modes. */
47045 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
47046 x = gen_rtx_SET (VOIDmode, target, x);
47047 emit_insn (x);
47048 if (target != d->target)
47049 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47051 return true;
47054 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47055 in terms of the variable form of vpermilps.
47057 Note that we will have already failed the immediate input vpermilps,
47058 which requires that the high and low part shuffle be identical; the
47059 variable form doesn't require that. */
47061 static bool
47062 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
47064 rtx rperm[8], vperm;
47065 unsigned i;
47067 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
47068 return false;
47070 /* We can only permute within the 128-bit lane. */
47071 for (i = 0; i < 8; ++i)
47073 unsigned e = d->perm[i];
47074 if (i < 4 ? e >= 4 : e < 4)
47075 return false;
47078 if (d->testing_p)
47079 return true;
47081 for (i = 0; i < 8; ++i)
47083 unsigned e = d->perm[i];
47085 /* Within each 128-bit lane, the elements of op0 are numbered
47086 from 0 and the elements of op1 are numbered from 4. */
47087 if (e >= 8 + 4)
47088 e -= 8;
47089 else if (e >= 4)
47090 e -= 4;
47092 rperm[i] = GEN_INT (e);
47095 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
47096 vperm = force_reg (V8SImode, vperm);
47097 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
47099 return true;
47102 /* Return true if permutation D can be performed as VMODE permutation
47103 instead. */
47105 static bool
47106 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
47108 unsigned int i, j, chunk;
47110 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
47111 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
47112 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
47113 return false;
47115 if (GET_MODE_NUNITS (vmode) >= d->nelt)
47116 return true;
47118 chunk = d->nelt / GET_MODE_NUNITS (vmode);
47119 for (i = 0; i < d->nelt; i += chunk)
47120 if (d->perm[i] & (chunk - 1))
47121 return false;
47122 else
47123 for (j = 1; j < chunk; ++j)
47124 if (d->perm[i] + j != d->perm[i + j])
47125 return false;
47127 return true;
47130 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47131 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
47133 static bool
47134 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
47136 unsigned i, nelt, eltsz, mask;
47137 unsigned char perm[64];
47138 machine_mode vmode = V16QImode;
47139 rtx rperm[64], vperm, target, op0, op1;
47141 nelt = d->nelt;
47143 if (!d->one_operand_p)
47145 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
47147 if (TARGET_AVX2
47148 && valid_perm_using_mode_p (V2TImode, d))
47150 if (d->testing_p)
47151 return true;
47153 /* Use vperm2i128 insn. The pattern uses
47154 V4DImode instead of V2TImode. */
47155 target = d->target;
47156 if (d->vmode != V4DImode)
47157 target = gen_reg_rtx (V4DImode);
47158 op0 = gen_lowpart (V4DImode, d->op0);
47159 op1 = gen_lowpart (V4DImode, d->op1);
47160 rperm[0]
47161 = GEN_INT ((d->perm[0] / (nelt / 2))
47162 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
47163 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
47164 if (target != d->target)
47165 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47166 return true;
47168 return false;
47171 else
47173 if (GET_MODE_SIZE (d->vmode) == 16)
47175 if (!TARGET_SSSE3)
47176 return false;
47178 else if (GET_MODE_SIZE (d->vmode) == 32)
47180 if (!TARGET_AVX2)
47181 return false;
47183 /* V4DImode should be already handled through
47184 expand_vselect by vpermq instruction. */
47185 gcc_assert (d->vmode != V4DImode);
47187 vmode = V32QImode;
47188 if (d->vmode == V8SImode
47189 || d->vmode == V16HImode
47190 || d->vmode == V32QImode)
47192 /* First see if vpermq can be used for
47193 V8SImode/V16HImode/V32QImode. */
47194 if (valid_perm_using_mode_p (V4DImode, d))
47196 for (i = 0; i < 4; i++)
47197 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
47198 if (d->testing_p)
47199 return true;
47200 target = gen_reg_rtx (V4DImode);
47201 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
47202 perm, 4, false))
47204 emit_move_insn (d->target,
47205 gen_lowpart (d->vmode, target));
47206 return true;
47208 return false;
47211 /* Next see if vpermd can be used. */
47212 if (valid_perm_using_mode_p (V8SImode, d))
47213 vmode = V8SImode;
47215 /* Or if vpermps can be used. */
47216 else if (d->vmode == V8SFmode)
47217 vmode = V8SImode;
47219 if (vmode == V32QImode)
47221 /* vpshufb only works intra lanes, it is not
47222 possible to shuffle bytes in between the lanes. */
47223 for (i = 0; i < nelt; ++i)
47224 if ((d->perm[i] ^ i) & (nelt / 2))
47225 return false;
47228 else if (GET_MODE_SIZE (d->vmode) == 64)
47230 if (!TARGET_AVX512BW)
47231 return false;
47233 /* If vpermq didn't work, vpshufb won't work either. */
47234 if (d->vmode == V8DFmode || d->vmode == V8DImode)
47235 return false;
47237 vmode = V64QImode;
47238 if (d->vmode == V16SImode
47239 || d->vmode == V32HImode
47240 || d->vmode == V64QImode)
47242 /* First see if vpermq can be used for
47243 V16SImode/V32HImode/V64QImode. */
47244 if (valid_perm_using_mode_p (V8DImode, d))
47246 for (i = 0; i < 8; i++)
47247 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
47248 if (d->testing_p)
47249 return true;
47250 target = gen_reg_rtx (V8DImode);
47251 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
47252 perm, 8, false))
47254 emit_move_insn (d->target,
47255 gen_lowpart (d->vmode, target));
47256 return true;
47258 return false;
47261 /* Next see if vpermd can be used. */
47262 if (valid_perm_using_mode_p (V16SImode, d))
47263 vmode = V16SImode;
47265 /* Or if vpermps can be used. */
47266 else if (d->vmode == V16SFmode)
47267 vmode = V16SImode;
47268 if (vmode == V64QImode)
47270 /* vpshufb only works intra lanes, it is not
47271 possible to shuffle bytes in between the lanes. */
47272 for (i = 0; i < nelt; ++i)
47273 if ((d->perm[i] ^ i) & (nelt / 4))
47274 return false;
47277 else
47278 return false;
47281 if (d->testing_p)
47282 return true;
47284 if (vmode == V8SImode)
47285 for (i = 0; i < 8; ++i)
47286 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
47287 else if (vmode == V16SImode)
47288 for (i = 0; i < 16; ++i)
47289 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
47290 else
47292 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47293 if (!d->one_operand_p)
47294 mask = 2 * nelt - 1;
47295 else if (vmode == V16QImode)
47296 mask = nelt - 1;
47297 else if (vmode == V64QImode)
47298 mask = nelt / 4 - 1;
47299 else
47300 mask = nelt / 2 - 1;
47302 for (i = 0; i < nelt; ++i)
47304 unsigned j, e = d->perm[i] & mask;
47305 for (j = 0; j < eltsz; ++j)
47306 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
47310 vperm = gen_rtx_CONST_VECTOR (vmode,
47311 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
47312 vperm = force_reg (vmode, vperm);
47314 target = d->target;
47315 if (d->vmode != vmode)
47316 target = gen_reg_rtx (vmode);
47317 op0 = gen_lowpart (vmode, d->op0);
47318 if (d->one_operand_p)
47320 if (vmode == V16QImode)
47321 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
47322 else if (vmode == V32QImode)
47323 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
47324 else if (vmode == V64QImode)
47325 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
47326 else if (vmode == V8SFmode)
47327 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
47328 else if (vmode == V8SImode)
47329 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
47330 else if (vmode == V16SFmode)
47331 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
47332 else if (vmode == V16SImode)
47333 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
47334 else
47335 gcc_unreachable ();
47337 else
47339 op1 = gen_lowpart (vmode, d->op1);
47340 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
47342 if (target != d->target)
47343 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47345 return true;
47348 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
47349 in a single instruction. */
47351 static bool
47352 expand_vec_perm_1 (struct expand_vec_perm_d *d)
47354 unsigned i, nelt = d->nelt;
47355 unsigned char perm2[MAX_VECT_LEN];
47357 /* Check plain VEC_SELECT first, because AVX has instructions that could
47358 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47359 input where SEL+CONCAT may not. */
47360 if (d->one_operand_p)
47362 int mask = nelt - 1;
47363 bool identity_perm = true;
47364 bool broadcast_perm = true;
47366 for (i = 0; i < nelt; i++)
47368 perm2[i] = d->perm[i] & mask;
47369 if (perm2[i] != i)
47370 identity_perm = false;
47371 if (perm2[i])
47372 broadcast_perm = false;
47375 if (identity_perm)
47377 if (!d->testing_p)
47378 emit_move_insn (d->target, d->op0);
47379 return true;
47381 else if (broadcast_perm && TARGET_AVX2)
47383 /* Use vpbroadcast{b,w,d}. */
47384 rtx (*gen) (rtx, rtx) = NULL;
47385 switch (d->vmode)
47387 case V64QImode:
47388 if (TARGET_AVX512BW)
47389 gen = gen_avx512bw_vec_dupv64qi_1;
47390 break;
47391 case V32QImode:
47392 gen = gen_avx2_pbroadcastv32qi_1;
47393 break;
47394 case V32HImode:
47395 if (TARGET_AVX512BW)
47396 gen = gen_avx512bw_vec_dupv32hi_1;
47397 break;
47398 case V16HImode:
47399 gen = gen_avx2_pbroadcastv16hi_1;
47400 break;
47401 case V16SImode:
47402 if (TARGET_AVX512F)
47403 gen = gen_avx512f_vec_dupv16si_1;
47404 break;
47405 case V8SImode:
47406 gen = gen_avx2_pbroadcastv8si_1;
47407 break;
47408 case V16QImode:
47409 gen = gen_avx2_pbroadcastv16qi;
47410 break;
47411 case V8HImode:
47412 gen = gen_avx2_pbroadcastv8hi;
47413 break;
47414 case V16SFmode:
47415 if (TARGET_AVX512F)
47416 gen = gen_avx512f_vec_dupv16sf_1;
47417 break;
47418 case V8SFmode:
47419 gen = gen_avx2_vec_dupv8sf_1;
47420 break;
47421 case V8DFmode:
47422 if (TARGET_AVX512F)
47423 gen = gen_avx512f_vec_dupv8df_1;
47424 break;
47425 case V8DImode:
47426 if (TARGET_AVX512F)
47427 gen = gen_avx512f_vec_dupv8di_1;
47428 break;
47429 /* For other modes prefer other shuffles this function creates. */
47430 default: break;
47432 if (gen != NULL)
47434 if (!d->testing_p)
47435 emit_insn (gen (d->target, d->op0));
47436 return true;
47440 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47441 return true;
47443 /* There are plenty of patterns in sse.md that are written for
47444 SEL+CONCAT and are not replicated for a single op. Perhaps
47445 that should be changed, to avoid the nastiness here. */
47447 /* Recognize interleave style patterns, which means incrementing
47448 every other permutation operand. */
47449 for (i = 0; i < nelt; i += 2)
47451 perm2[i] = d->perm[i] & mask;
47452 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47454 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47455 d->testing_p))
47456 return true;
47458 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
47459 if (nelt >= 4)
47461 for (i = 0; i < nelt; i += 4)
47463 perm2[i + 0] = d->perm[i + 0] & mask;
47464 perm2[i + 1] = d->perm[i + 1] & mask;
47465 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47466 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47469 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47470 d->testing_p))
47471 return true;
47475 /* Finally, try the fully general two operand permute. */
47476 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47477 d->testing_p))
47478 return true;
47480 /* Recognize interleave style patterns with reversed operands. */
47481 if (!d->one_operand_p)
47483 for (i = 0; i < nelt; ++i)
47485 unsigned e = d->perm[i];
47486 if (e >= nelt)
47487 e -= nelt;
47488 else
47489 e += nelt;
47490 perm2[i] = e;
47493 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47494 d->testing_p))
47495 return true;
47498 /* Try the SSE4.1 blend variable merge instructions. */
47499 if (expand_vec_perm_blend (d))
47500 return true;
47502 /* Try one of the AVX vpermil variable permutations. */
47503 if (expand_vec_perm_vpermil (d))
47504 return true;
47506 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47507 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47508 if (expand_vec_perm_pshufb (d))
47509 return true;
47511 /* Try the AVX2 vpalignr instruction. */
47512 if (expand_vec_perm_palignr (d, true))
47513 return true;
47515 /* Try the AVX512F vpermi2 instructions. */
47516 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47517 return true;
47519 return false;
47522 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47523 in terms of a pair of pshuflw + pshufhw instructions. */
47525 static bool
47526 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47528 unsigned char perm2[MAX_VECT_LEN];
47529 unsigned i;
47530 bool ok;
47532 if (d->vmode != V8HImode || !d->one_operand_p)
47533 return false;
47535 /* The two permutations only operate in 64-bit lanes. */
47536 for (i = 0; i < 4; ++i)
47537 if (d->perm[i] >= 4)
47538 return false;
47539 for (i = 4; i < 8; ++i)
47540 if (d->perm[i] < 4)
47541 return false;
47543 if (d->testing_p)
47544 return true;
47546 /* Emit the pshuflw. */
47547 memcpy (perm2, d->perm, 4);
47548 for (i = 4; i < 8; ++i)
47549 perm2[i] = i;
47550 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47551 gcc_assert (ok);
47553 /* Emit the pshufhw. */
47554 memcpy (perm2 + 4, d->perm + 4, 4);
47555 for (i = 0; i < 4; ++i)
47556 perm2[i] = i;
47557 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47558 gcc_assert (ok);
47560 return true;
47563 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47564 the permutation using the SSSE3 palignr instruction. This succeeds
47565 when all of the elements in PERM fit within one vector and we merely
47566 need to shift them down so that a single vector permutation has a
47567 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47568 the vpalignr instruction itself can perform the requested permutation. */
47570 static bool
47571 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47573 unsigned i, nelt = d->nelt;
47574 unsigned min, max, minswap, maxswap;
47575 bool in_order, ok, swap = false;
47576 rtx shift, target;
47577 struct expand_vec_perm_d dcopy;
47579 /* Even with AVX, palignr only operates on 128-bit vectors,
47580 in AVX2 palignr operates on both 128-bit lanes. */
47581 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47582 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47583 return false;
47585 min = 2 * nelt;
47586 max = 0;
47587 minswap = 2 * nelt;
47588 maxswap = 0;
47589 for (i = 0; i < nelt; ++i)
47591 unsigned e = d->perm[i];
47592 unsigned eswap = d->perm[i] ^ nelt;
47593 if (GET_MODE_SIZE (d->vmode) == 32)
47595 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47596 eswap = e ^ (nelt / 2);
47598 if (e < min)
47599 min = e;
47600 if (e > max)
47601 max = e;
47602 if (eswap < minswap)
47603 minswap = eswap;
47604 if (eswap > maxswap)
47605 maxswap = eswap;
47607 if (min == 0
47608 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47610 if (d->one_operand_p
47611 || minswap == 0
47612 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47613 ? nelt / 2 : nelt))
47614 return false;
47615 swap = true;
47616 min = minswap;
47617 max = maxswap;
47620 /* Given that we have SSSE3, we know we'll be able to implement the
47621 single operand permutation after the palignr with pshufb for
47622 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47623 first. */
47624 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47625 return true;
47627 dcopy = *d;
47628 if (swap)
47630 dcopy.op0 = d->op1;
47631 dcopy.op1 = d->op0;
47632 for (i = 0; i < nelt; ++i)
47633 dcopy.perm[i] ^= nelt;
47636 in_order = true;
47637 for (i = 0; i < nelt; ++i)
47639 unsigned e = dcopy.perm[i];
47640 if (GET_MODE_SIZE (d->vmode) == 32
47641 && e >= nelt
47642 && (e & (nelt / 2 - 1)) < min)
47643 e = e - min - (nelt / 2);
47644 else
47645 e = e - min;
47646 if (e != i)
47647 in_order = false;
47648 dcopy.perm[i] = e;
47650 dcopy.one_operand_p = true;
47652 if (single_insn_only_p && !in_order)
47653 return false;
47655 /* For AVX2, test whether we can permute the result in one instruction. */
47656 if (d->testing_p)
47658 if (in_order)
47659 return true;
47660 dcopy.op1 = dcopy.op0;
47661 return expand_vec_perm_1 (&dcopy);
47664 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
47665 if (GET_MODE_SIZE (d->vmode) == 16)
47667 target = gen_reg_rtx (TImode);
47668 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47669 gen_lowpart (TImode, dcopy.op0), shift));
47671 else
47673 target = gen_reg_rtx (V2TImode);
47674 emit_insn (gen_avx2_palignrv2ti (target,
47675 gen_lowpart (V2TImode, dcopy.op1),
47676 gen_lowpart (V2TImode, dcopy.op0),
47677 shift));
47680 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47682 /* Test for the degenerate case where the alignment by itself
47683 produces the desired permutation. */
47684 if (in_order)
47686 emit_move_insn (d->target, dcopy.op0);
47687 return true;
47690 ok = expand_vec_perm_1 (&dcopy);
47691 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47693 return ok;
47696 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
47697 the permutation using the SSE4_1 pblendv instruction. Potentially
47698 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
47700 static bool
47701 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47703 unsigned i, which, nelt = d->nelt;
47704 struct expand_vec_perm_d dcopy, dcopy1;
47705 machine_mode vmode = d->vmode;
47706 bool ok;
47708 /* Use the same checks as in expand_vec_perm_blend. */
47709 if (d->one_operand_p)
47710 return false;
47711 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47713 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47715 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47717 else
47718 return false;
47720 /* Figure out where permutation elements stay not in their
47721 respective lanes. */
47722 for (i = 0, which = 0; i < nelt; ++i)
47724 unsigned e = d->perm[i];
47725 if (e != i)
47726 which |= (e < nelt ? 1 : 2);
47728 /* We can pblend the part where elements stay not in their
47729 respective lanes only when these elements are all in one
47730 half of a permutation.
47731 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
47732 lanes, but both 8 and 9 >= 8
47733 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
47734 respective lanes and 8 >= 8, but 2 not. */
47735 if (which != 1 && which != 2)
47736 return false;
47737 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
47738 return true;
47740 /* First we apply one operand permutation to the part where
47741 elements stay not in their respective lanes. */
47742 dcopy = *d;
47743 if (which == 2)
47744 dcopy.op0 = dcopy.op1 = d->op1;
47745 else
47746 dcopy.op0 = dcopy.op1 = d->op0;
47747 if (!d->testing_p)
47748 dcopy.target = gen_reg_rtx (vmode);
47749 dcopy.one_operand_p = true;
47751 for (i = 0; i < nelt; ++i)
47752 dcopy.perm[i] = d->perm[i] & (nelt - 1);
47754 ok = expand_vec_perm_1 (&dcopy);
47755 if (GET_MODE_SIZE (vmode) != 16 && !ok)
47756 return false;
47757 else
47758 gcc_assert (ok);
47759 if (d->testing_p)
47760 return true;
47762 /* Next we put permuted elements into their positions. */
47763 dcopy1 = *d;
47764 if (which == 2)
47765 dcopy1.op1 = dcopy.target;
47766 else
47767 dcopy1.op0 = dcopy.target;
47769 for (i = 0; i < nelt; ++i)
47770 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
47772 ok = expand_vec_perm_blend (&dcopy1);
47773 gcc_assert (ok);
47775 return true;
47778 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
47780 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47781 a two vector permutation into a single vector permutation by using
47782 an interleave operation to merge the vectors. */
47784 static bool
47785 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
47787 struct expand_vec_perm_d dremap, dfinal;
47788 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
47789 unsigned HOST_WIDE_INT contents;
47790 unsigned char remap[2 * MAX_VECT_LEN];
47791 rtx_insn *seq;
47792 bool ok, same_halves = false;
47794 if (GET_MODE_SIZE (d->vmode) == 16)
47796 if (d->one_operand_p)
47797 return false;
47799 else if (GET_MODE_SIZE (d->vmode) == 32)
47801 if (!TARGET_AVX)
47802 return false;
47803 /* For 32-byte modes allow even d->one_operand_p.
47804 The lack of cross-lane shuffling in some instructions
47805 might prevent a single insn shuffle. */
47806 dfinal = *d;
47807 dfinal.testing_p = true;
47808 /* If expand_vec_perm_interleave3 can expand this into
47809 a 3 insn sequence, give up and let it be expanded as
47810 3 insn sequence. While that is one insn longer,
47811 it doesn't need a memory operand and in the common
47812 case that both interleave low and high permutations
47813 with the same operands are adjacent needs 4 insns
47814 for both after CSE. */
47815 if (expand_vec_perm_interleave3 (&dfinal))
47816 return false;
47818 else
47819 return false;
47821 /* Examine from whence the elements come. */
47822 contents = 0;
47823 for (i = 0; i < nelt; ++i)
47824 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
47826 memset (remap, 0xff, sizeof (remap));
47827 dremap = *d;
47829 if (GET_MODE_SIZE (d->vmode) == 16)
47831 unsigned HOST_WIDE_INT h1, h2, h3, h4;
47833 /* Split the two input vectors into 4 halves. */
47834 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
47835 h2 = h1 << nelt2;
47836 h3 = h2 << nelt2;
47837 h4 = h3 << nelt2;
47839 /* If the elements from the low halves use interleave low, and similarly
47840 for interleave high. If the elements are from mis-matched halves, we
47841 can use shufps for V4SF/V4SI or do a DImode shuffle. */
47842 if ((contents & (h1 | h3)) == contents)
47844 /* punpckl* */
47845 for (i = 0; i < nelt2; ++i)
47847 remap[i] = i * 2;
47848 remap[i + nelt] = i * 2 + 1;
47849 dremap.perm[i * 2] = i;
47850 dremap.perm[i * 2 + 1] = i + nelt;
47852 if (!TARGET_SSE2 && d->vmode == V4SImode)
47853 dremap.vmode = V4SFmode;
47855 else if ((contents & (h2 | h4)) == contents)
47857 /* punpckh* */
47858 for (i = 0; i < nelt2; ++i)
47860 remap[i + nelt2] = i * 2;
47861 remap[i + nelt + nelt2] = i * 2 + 1;
47862 dremap.perm[i * 2] = i + nelt2;
47863 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
47865 if (!TARGET_SSE2 && d->vmode == V4SImode)
47866 dremap.vmode = V4SFmode;
47868 else if ((contents & (h1 | h4)) == contents)
47870 /* shufps */
47871 for (i = 0; i < nelt2; ++i)
47873 remap[i] = i;
47874 remap[i + nelt + nelt2] = i + nelt2;
47875 dremap.perm[i] = i;
47876 dremap.perm[i + nelt2] = i + nelt + nelt2;
47878 if (nelt != 4)
47880 /* shufpd */
47881 dremap.vmode = V2DImode;
47882 dremap.nelt = 2;
47883 dremap.perm[0] = 0;
47884 dremap.perm[1] = 3;
47887 else if ((contents & (h2 | h3)) == contents)
47889 /* shufps */
47890 for (i = 0; i < nelt2; ++i)
47892 remap[i + nelt2] = i;
47893 remap[i + nelt] = i + nelt2;
47894 dremap.perm[i] = i + nelt2;
47895 dremap.perm[i + nelt2] = i + nelt;
47897 if (nelt != 4)
47899 /* shufpd */
47900 dremap.vmode = V2DImode;
47901 dremap.nelt = 2;
47902 dremap.perm[0] = 1;
47903 dremap.perm[1] = 2;
47906 else
47907 return false;
47909 else
47911 unsigned int nelt4 = nelt / 4, nzcnt = 0;
47912 unsigned HOST_WIDE_INT q[8];
47913 unsigned int nonzero_halves[4];
47915 /* Split the two input vectors into 8 quarters. */
47916 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
47917 for (i = 1; i < 8; ++i)
47918 q[i] = q[0] << (nelt4 * i);
47919 for (i = 0; i < 4; ++i)
47920 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
47922 nonzero_halves[nzcnt] = i;
47923 ++nzcnt;
47926 if (nzcnt == 1)
47928 gcc_assert (d->one_operand_p);
47929 nonzero_halves[1] = nonzero_halves[0];
47930 same_halves = true;
47932 else if (d->one_operand_p)
47934 gcc_assert (nonzero_halves[0] == 0);
47935 gcc_assert (nonzero_halves[1] == 1);
47938 if (nzcnt <= 2)
47940 if (d->perm[0] / nelt2 == nonzero_halves[1])
47942 /* Attempt to increase the likelihood that dfinal
47943 shuffle will be intra-lane. */
47944 char tmph = nonzero_halves[0];
47945 nonzero_halves[0] = nonzero_halves[1];
47946 nonzero_halves[1] = tmph;
47949 /* vperm2f128 or vperm2i128. */
47950 for (i = 0; i < nelt2; ++i)
47952 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
47953 remap[i + nonzero_halves[0] * nelt2] = i;
47954 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
47955 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
47958 if (d->vmode != V8SFmode
47959 && d->vmode != V4DFmode
47960 && d->vmode != V8SImode)
47962 dremap.vmode = V8SImode;
47963 dremap.nelt = 8;
47964 for (i = 0; i < 4; ++i)
47966 dremap.perm[i] = i + nonzero_halves[0] * 4;
47967 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
47971 else if (d->one_operand_p)
47972 return false;
47973 else if (TARGET_AVX2
47974 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
47976 /* vpunpckl* */
47977 for (i = 0; i < nelt4; ++i)
47979 remap[i] = i * 2;
47980 remap[i + nelt] = i * 2 + 1;
47981 remap[i + nelt2] = i * 2 + nelt2;
47982 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
47983 dremap.perm[i * 2] = i;
47984 dremap.perm[i * 2 + 1] = i + nelt;
47985 dremap.perm[i * 2 + nelt2] = i + nelt2;
47986 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
47989 else if (TARGET_AVX2
47990 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
47992 /* vpunpckh* */
47993 for (i = 0; i < nelt4; ++i)
47995 remap[i + nelt4] = i * 2;
47996 remap[i + nelt + nelt4] = i * 2 + 1;
47997 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
47998 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
47999 dremap.perm[i * 2] = i + nelt4;
48000 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
48001 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
48002 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
48005 else
48006 return false;
48009 /* Use the remapping array set up above to move the elements from their
48010 swizzled locations into their final destinations. */
48011 dfinal = *d;
48012 for (i = 0; i < nelt; ++i)
48014 unsigned e = remap[d->perm[i]];
48015 gcc_assert (e < nelt);
48016 /* If same_halves is true, both halves of the remapped vector are the
48017 same. Avoid cross-lane accesses if possible. */
48018 if (same_halves && i >= nelt2)
48020 gcc_assert (e < nelt2);
48021 dfinal.perm[i] = e + nelt2;
48023 else
48024 dfinal.perm[i] = e;
48026 if (!d->testing_p)
48028 dremap.target = gen_reg_rtx (dremap.vmode);
48029 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48031 dfinal.op1 = dfinal.op0;
48032 dfinal.one_operand_p = true;
48034 /* Test if the final remap can be done with a single insn. For V4SFmode or
48035 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
48036 start_sequence ();
48037 ok = expand_vec_perm_1 (&dfinal);
48038 seq = get_insns ();
48039 end_sequence ();
48041 if (!ok)
48042 return false;
48044 if (d->testing_p)
48045 return true;
48047 if (dremap.vmode != dfinal.vmode)
48049 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
48050 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
48053 ok = expand_vec_perm_1 (&dremap);
48054 gcc_assert (ok);
48056 emit_insn (seq);
48057 return true;
48060 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48061 a single vector cross-lane permutation into vpermq followed
48062 by any of the single insn permutations. */
48064 static bool
48065 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
48067 struct expand_vec_perm_d dremap, dfinal;
48068 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
48069 unsigned contents[2];
48070 bool ok;
48072 if (!(TARGET_AVX2
48073 && (d->vmode == V32QImode || d->vmode == V16HImode)
48074 && d->one_operand_p))
48075 return false;
48077 contents[0] = 0;
48078 contents[1] = 0;
48079 for (i = 0; i < nelt2; ++i)
48081 contents[0] |= 1u << (d->perm[i] / nelt4);
48082 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
48085 for (i = 0; i < 2; ++i)
48087 unsigned int cnt = 0;
48088 for (j = 0; j < 4; ++j)
48089 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
48090 return false;
48093 if (d->testing_p)
48094 return true;
48096 dremap = *d;
48097 dremap.vmode = V4DImode;
48098 dremap.nelt = 4;
48099 dremap.target = gen_reg_rtx (V4DImode);
48100 dremap.op0 = gen_lowpart (V4DImode, d->op0);
48101 dremap.op1 = dremap.op0;
48102 dremap.one_operand_p = true;
48103 for (i = 0; i < 2; ++i)
48105 unsigned int cnt = 0;
48106 for (j = 0; j < 4; ++j)
48107 if ((contents[i] & (1u << j)) != 0)
48108 dremap.perm[2 * i + cnt++] = j;
48109 for (; cnt < 2; ++cnt)
48110 dremap.perm[2 * i + cnt] = 0;
48113 dfinal = *d;
48114 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48115 dfinal.op1 = dfinal.op0;
48116 dfinal.one_operand_p = true;
48117 for (i = 0, j = 0; i < nelt; ++i)
48119 if (i == nelt2)
48120 j = 2;
48121 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
48122 if ((d->perm[i] / nelt4) == dremap.perm[j])
48124 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
48125 dfinal.perm[i] |= nelt4;
48126 else
48127 gcc_unreachable ();
48130 ok = expand_vec_perm_1 (&dremap);
48131 gcc_assert (ok);
48133 ok = expand_vec_perm_1 (&dfinal);
48134 gcc_assert (ok);
48136 return true;
48139 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
48140 a vector permutation using two instructions, vperm2f128 resp.
48141 vperm2i128 followed by any single in-lane permutation. */
48143 static bool
48144 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
48146 struct expand_vec_perm_d dfirst, dsecond;
48147 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
48148 bool ok;
48150 if (!TARGET_AVX
48151 || GET_MODE_SIZE (d->vmode) != 32
48152 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
48153 return false;
48155 dsecond = *d;
48156 dsecond.one_operand_p = false;
48157 dsecond.testing_p = true;
48159 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
48160 immediate. For perm < 16 the second permutation uses
48161 d->op0 as first operand, for perm >= 16 it uses d->op1
48162 as first operand. The second operand is the result of
48163 vperm2[fi]128. */
48164 for (perm = 0; perm < 32; perm++)
48166 /* Ignore permutations which do not move anything cross-lane. */
48167 if (perm < 16)
48169 /* The second shuffle for e.g. V4DFmode has
48170 0123 and ABCD operands.
48171 Ignore AB23, as 23 is already in the second lane
48172 of the first operand. */
48173 if ((perm & 0xc) == (1 << 2)) continue;
48174 /* And 01CD, as 01 is in the first lane of the first
48175 operand. */
48176 if ((perm & 3) == 0) continue;
48177 /* And 4567, as then the vperm2[fi]128 doesn't change
48178 anything on the original 4567 second operand. */
48179 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
48181 else
48183 /* The second shuffle for e.g. V4DFmode has
48184 4567 and ABCD operands.
48185 Ignore AB67, as 67 is already in the second lane
48186 of the first operand. */
48187 if ((perm & 0xc) == (3 << 2)) continue;
48188 /* And 45CD, as 45 is in the first lane of the first
48189 operand. */
48190 if ((perm & 3) == 2) continue;
48191 /* And 0123, as then the vperm2[fi]128 doesn't change
48192 anything on the original 0123 first operand. */
48193 if ((perm & 0xf) == (1 << 2)) continue;
48196 for (i = 0; i < nelt; i++)
48198 j = d->perm[i] / nelt2;
48199 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
48200 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
48201 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
48202 dsecond.perm[i] = d->perm[i] & (nelt - 1);
48203 else
48204 break;
48207 if (i == nelt)
48209 start_sequence ();
48210 ok = expand_vec_perm_1 (&dsecond);
48211 end_sequence ();
48213 else
48214 ok = false;
48216 if (ok)
48218 if (d->testing_p)
48219 return true;
48221 /* Found a usable second shuffle. dfirst will be
48222 vperm2f128 on d->op0 and d->op1. */
48223 dsecond.testing_p = false;
48224 dfirst = *d;
48225 dfirst.target = gen_reg_rtx (d->vmode);
48226 for (i = 0; i < nelt; i++)
48227 dfirst.perm[i] = (i & (nelt2 - 1))
48228 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
48230 canonicalize_perm (&dfirst);
48231 ok = expand_vec_perm_1 (&dfirst);
48232 gcc_assert (ok);
48234 /* And dsecond is some single insn shuffle, taking
48235 d->op0 and result of vperm2f128 (if perm < 16) or
48236 d->op1 and result of vperm2f128 (otherwise). */
48237 if (perm >= 16)
48238 dsecond.op0 = dsecond.op1;
48239 dsecond.op1 = dfirst.target;
48241 ok = expand_vec_perm_1 (&dsecond);
48242 gcc_assert (ok);
48244 return true;
48247 /* For one operand, the only useful vperm2f128 permutation is 0x01
48248 aka lanes swap. */
48249 if (d->one_operand_p)
48250 return false;
48253 return false;
48256 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48257 a two vector permutation using 2 intra-lane interleave insns
48258 and cross-lane shuffle for 32-byte vectors. */
48260 static bool
48261 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
48263 unsigned i, nelt;
48264 rtx (*gen) (rtx, rtx, rtx);
48266 if (d->one_operand_p)
48267 return false;
48268 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
48270 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
48272 else
48273 return false;
48275 nelt = d->nelt;
48276 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
48277 return false;
48278 for (i = 0; i < nelt; i += 2)
48279 if (d->perm[i] != d->perm[0] + i / 2
48280 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
48281 return false;
48283 if (d->testing_p)
48284 return true;
48286 switch (d->vmode)
48288 case V32QImode:
48289 if (d->perm[0])
48290 gen = gen_vec_interleave_highv32qi;
48291 else
48292 gen = gen_vec_interleave_lowv32qi;
48293 break;
48294 case V16HImode:
48295 if (d->perm[0])
48296 gen = gen_vec_interleave_highv16hi;
48297 else
48298 gen = gen_vec_interleave_lowv16hi;
48299 break;
48300 case V8SImode:
48301 if (d->perm[0])
48302 gen = gen_vec_interleave_highv8si;
48303 else
48304 gen = gen_vec_interleave_lowv8si;
48305 break;
48306 case V4DImode:
48307 if (d->perm[0])
48308 gen = gen_vec_interleave_highv4di;
48309 else
48310 gen = gen_vec_interleave_lowv4di;
48311 break;
48312 case V8SFmode:
48313 if (d->perm[0])
48314 gen = gen_vec_interleave_highv8sf;
48315 else
48316 gen = gen_vec_interleave_lowv8sf;
48317 break;
48318 case V4DFmode:
48319 if (d->perm[0])
48320 gen = gen_vec_interleave_highv4df;
48321 else
48322 gen = gen_vec_interleave_lowv4df;
48323 break;
48324 default:
48325 gcc_unreachable ();
48328 emit_insn (gen (d->target, d->op0, d->op1));
48329 return true;
48332 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
48333 a single vector permutation using a single intra-lane vector
48334 permutation, vperm2f128 swapping the lanes and vblend* insn blending
48335 the non-swapped and swapped vectors together. */
48337 static bool
48338 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
48340 struct expand_vec_perm_d dfirst, dsecond;
48341 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
48342 rtx_insn *seq;
48343 bool ok;
48344 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
48346 if (!TARGET_AVX
48347 || TARGET_AVX2
48348 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
48349 || !d->one_operand_p)
48350 return false;
48352 dfirst = *d;
48353 for (i = 0; i < nelt; i++)
48354 dfirst.perm[i] = 0xff;
48355 for (i = 0, msk = 0; i < nelt; i++)
48357 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
48358 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
48359 return false;
48360 dfirst.perm[j] = d->perm[i];
48361 if (j != i)
48362 msk |= (1 << i);
48364 for (i = 0; i < nelt; i++)
48365 if (dfirst.perm[i] == 0xff)
48366 dfirst.perm[i] = i;
48368 if (!d->testing_p)
48369 dfirst.target = gen_reg_rtx (dfirst.vmode);
48371 start_sequence ();
48372 ok = expand_vec_perm_1 (&dfirst);
48373 seq = get_insns ();
48374 end_sequence ();
48376 if (!ok)
48377 return false;
48379 if (d->testing_p)
48380 return true;
48382 emit_insn (seq);
48384 dsecond = *d;
48385 dsecond.op0 = dfirst.target;
48386 dsecond.op1 = dfirst.target;
48387 dsecond.one_operand_p = true;
48388 dsecond.target = gen_reg_rtx (dsecond.vmode);
48389 for (i = 0; i < nelt; i++)
48390 dsecond.perm[i] = i ^ nelt2;
48392 ok = expand_vec_perm_1 (&dsecond);
48393 gcc_assert (ok);
48395 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48396 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48397 return true;
48400 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
48401 permutation using two vperm2f128, followed by a vshufpd insn blending
48402 the two vectors together. */
48404 static bool
48405 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48407 struct expand_vec_perm_d dfirst, dsecond, dthird;
48408 bool ok;
48410 if (!TARGET_AVX || (d->vmode != V4DFmode))
48411 return false;
48413 if (d->testing_p)
48414 return true;
48416 dfirst = *d;
48417 dsecond = *d;
48418 dthird = *d;
48420 dfirst.perm[0] = (d->perm[0] & ~1);
48421 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48422 dfirst.perm[2] = (d->perm[2] & ~1);
48423 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48424 dsecond.perm[0] = (d->perm[1] & ~1);
48425 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48426 dsecond.perm[2] = (d->perm[3] & ~1);
48427 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48428 dthird.perm[0] = (d->perm[0] % 2);
48429 dthird.perm[1] = (d->perm[1] % 2) + 4;
48430 dthird.perm[2] = (d->perm[2] % 2) + 2;
48431 dthird.perm[3] = (d->perm[3] % 2) + 6;
48433 dfirst.target = gen_reg_rtx (dfirst.vmode);
48434 dsecond.target = gen_reg_rtx (dsecond.vmode);
48435 dthird.op0 = dfirst.target;
48436 dthird.op1 = dsecond.target;
48437 dthird.one_operand_p = false;
48439 canonicalize_perm (&dfirst);
48440 canonicalize_perm (&dsecond);
48442 ok = expand_vec_perm_1 (&dfirst)
48443 && expand_vec_perm_1 (&dsecond)
48444 && expand_vec_perm_1 (&dthird);
48446 gcc_assert (ok);
48448 return true;
48451 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
48452 permutation with two pshufb insns and an ior. We should have already
48453 failed all two instruction sequences. */
48455 static bool
48456 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48458 rtx rperm[2][16], vperm, l, h, op, m128;
48459 unsigned int i, nelt, eltsz;
48461 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48462 return false;
48463 gcc_assert (!d->one_operand_p);
48465 if (d->testing_p)
48466 return true;
48468 nelt = d->nelt;
48469 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48471 /* Generate two permutation masks. If the required element is within
48472 the given vector it is shuffled into the proper lane. If the required
48473 element is in the other vector, force a zero into the lane by setting
48474 bit 7 in the permutation mask. */
48475 m128 = GEN_INT (-128);
48476 for (i = 0; i < nelt; ++i)
48478 unsigned j, e = d->perm[i];
48479 unsigned which = (e >= nelt);
48480 if (e >= nelt)
48481 e -= nelt;
48483 for (j = 0; j < eltsz; ++j)
48485 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48486 rperm[1-which][i*eltsz + j] = m128;
48490 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48491 vperm = force_reg (V16QImode, vperm);
48493 l = gen_reg_rtx (V16QImode);
48494 op = gen_lowpart (V16QImode, d->op0);
48495 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48497 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48498 vperm = force_reg (V16QImode, vperm);
48500 h = gen_reg_rtx (V16QImode);
48501 op = gen_lowpart (V16QImode, d->op1);
48502 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48504 op = d->target;
48505 if (d->vmode != V16QImode)
48506 op = gen_reg_rtx (V16QImode);
48507 emit_insn (gen_iorv16qi3 (op, l, h));
48508 if (op != d->target)
48509 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48511 return true;
48514 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48515 with two vpshufb insns, vpermq and vpor. We should have already failed
48516 all two or three instruction sequences. */
48518 static bool
48519 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48521 rtx rperm[2][32], vperm, l, h, hp, op, m128;
48522 unsigned int i, nelt, eltsz;
48524 if (!TARGET_AVX2
48525 || !d->one_operand_p
48526 || (d->vmode != V32QImode && d->vmode != V16HImode))
48527 return false;
48529 if (d->testing_p)
48530 return true;
48532 nelt = d->nelt;
48533 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48535 /* Generate two permutation masks. If the required element is within
48536 the same lane, it is shuffled in. If the required element from the
48537 other lane, force a zero by setting bit 7 in the permutation mask.
48538 In the other mask the mask has non-negative elements if element
48539 is requested from the other lane, but also moved to the other lane,
48540 so that the result of vpshufb can have the two V2TImode halves
48541 swapped. */
48542 m128 = GEN_INT (-128);
48543 for (i = 0; i < nelt; ++i)
48545 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48546 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48548 for (j = 0; j < eltsz; ++j)
48550 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48551 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48555 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48556 vperm = force_reg (V32QImode, vperm);
48558 h = gen_reg_rtx (V32QImode);
48559 op = gen_lowpart (V32QImode, d->op0);
48560 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48562 /* Swap the 128-byte lanes of h into hp. */
48563 hp = gen_reg_rtx (V4DImode);
48564 op = gen_lowpart (V4DImode, h);
48565 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48566 const1_rtx));
48568 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48569 vperm = force_reg (V32QImode, vperm);
48571 l = gen_reg_rtx (V32QImode);
48572 op = gen_lowpart (V32QImode, d->op0);
48573 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48575 op = d->target;
48576 if (d->vmode != V32QImode)
48577 op = gen_reg_rtx (V32QImode);
48578 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48579 if (op != d->target)
48580 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48582 return true;
48585 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48586 and extract-odd permutations of two V32QImode and V16QImode operand
48587 with two vpshufb insns, vpor and vpermq. We should have already
48588 failed all two or three instruction sequences. */
48590 static bool
48591 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48593 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48594 unsigned int i, nelt, eltsz;
48596 if (!TARGET_AVX2
48597 || d->one_operand_p
48598 || (d->vmode != V32QImode && d->vmode != V16HImode))
48599 return false;
48601 for (i = 0; i < d->nelt; ++i)
48602 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48603 return false;
48605 if (d->testing_p)
48606 return true;
48608 nelt = d->nelt;
48609 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48611 /* Generate two permutation masks. In the first permutation mask
48612 the first quarter will contain indexes for the first half
48613 of the op0, the second quarter will contain bit 7 set, third quarter
48614 will contain indexes for the second half of the op0 and the
48615 last quarter bit 7 set. In the second permutation mask
48616 the first quarter will contain bit 7 set, the second quarter
48617 indexes for the first half of the op1, the third quarter bit 7 set
48618 and last quarter indexes for the second half of the op1.
48619 I.e. the first mask e.g. for V32QImode extract even will be:
48620 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48621 (all values masked with 0xf except for -128) and second mask
48622 for extract even will be
48623 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48624 m128 = GEN_INT (-128);
48625 for (i = 0; i < nelt; ++i)
48627 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48628 unsigned which = d->perm[i] >= nelt;
48629 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48631 for (j = 0; j < eltsz; ++j)
48633 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48634 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48638 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48639 vperm = force_reg (V32QImode, vperm);
48641 l = gen_reg_rtx (V32QImode);
48642 op = gen_lowpart (V32QImode, d->op0);
48643 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48645 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48646 vperm = force_reg (V32QImode, vperm);
48648 h = gen_reg_rtx (V32QImode);
48649 op = gen_lowpart (V32QImode, d->op1);
48650 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48652 ior = gen_reg_rtx (V32QImode);
48653 emit_insn (gen_iorv32qi3 (ior, l, h));
48655 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48656 op = gen_reg_rtx (V4DImode);
48657 ior = gen_lowpart (V4DImode, ior);
48658 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48659 const1_rtx, GEN_INT (3)));
48660 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48662 return true;
48665 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48666 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48667 with two "and" and "pack" or two "shift" and "pack" insns. We should
48668 have already failed all two instruction sequences. */
48670 static bool
48671 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48673 rtx op, dop0, dop1, t, rperm[16];
48674 unsigned i, odd, c, s, nelt = d->nelt;
48675 bool end_perm = false;
48676 machine_mode half_mode;
48677 rtx (*gen_and) (rtx, rtx, rtx);
48678 rtx (*gen_pack) (rtx, rtx, rtx);
48679 rtx (*gen_shift) (rtx, rtx, rtx);
48681 if (d->one_operand_p)
48682 return false;
48684 switch (d->vmode)
48686 case V8HImode:
48687 /* Required for "pack". */
48688 if (!TARGET_SSE4_1)
48689 return false;
48690 c = 0xffff;
48691 s = 16;
48692 half_mode = V4SImode;
48693 gen_and = gen_andv4si3;
48694 gen_pack = gen_sse4_1_packusdw;
48695 gen_shift = gen_lshrv4si3;
48696 break;
48697 case V16QImode:
48698 /* No check as all instructions are SSE2. */
48699 c = 0xff;
48700 s = 8;
48701 half_mode = V8HImode;
48702 gen_and = gen_andv8hi3;
48703 gen_pack = gen_sse2_packuswb;
48704 gen_shift = gen_lshrv8hi3;
48705 break;
48706 case V16HImode:
48707 if (!TARGET_AVX2)
48708 return false;
48709 c = 0xffff;
48710 s = 16;
48711 half_mode = V8SImode;
48712 gen_and = gen_andv8si3;
48713 gen_pack = gen_avx2_packusdw;
48714 gen_shift = gen_lshrv8si3;
48715 end_perm = true;
48716 break;
48717 case V32QImode:
48718 if (!TARGET_AVX2)
48719 return false;
48720 c = 0xff;
48721 s = 8;
48722 half_mode = V16HImode;
48723 gen_and = gen_andv16hi3;
48724 gen_pack = gen_avx2_packuswb;
48725 gen_shift = gen_lshrv16hi3;
48726 end_perm = true;
48727 break;
48728 default:
48729 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
48730 general shuffles. */
48731 return false;
48734 /* Check that permutation is even or odd. */
48735 odd = d->perm[0];
48736 if (odd > 1)
48737 return false;
48739 for (i = 1; i < nelt; ++i)
48740 if (d->perm[i] != 2 * i + odd)
48741 return false;
48743 if (d->testing_p)
48744 return true;
48746 dop0 = gen_reg_rtx (half_mode);
48747 dop1 = gen_reg_rtx (half_mode);
48748 if (odd == 0)
48750 for (i = 0; i < nelt / 2; i++)
48751 rperm[i] = GEN_INT (c);
48752 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
48753 t = force_reg (half_mode, t);
48754 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
48755 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
48757 else
48759 emit_insn (gen_shift (dop0,
48760 gen_lowpart (half_mode, d->op0),
48761 GEN_INT (s)));
48762 emit_insn (gen_shift (dop1,
48763 gen_lowpart (half_mode, d->op1),
48764 GEN_INT (s)));
48766 /* In AVX2 for 256 bit case we need to permute pack result. */
48767 if (TARGET_AVX2 && end_perm)
48769 op = gen_reg_rtx (d->vmode);
48770 t = gen_reg_rtx (V4DImode);
48771 emit_insn (gen_pack (op, dop0, dop1));
48772 emit_insn (gen_avx2_permv4di_1 (t,
48773 gen_lowpart (V4DImode, op),
48774 const0_rtx,
48775 const2_rtx,
48776 const1_rtx,
48777 GEN_INT (3)));
48778 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
48780 else
48781 emit_insn (gen_pack (d->target, dop0, dop1));
48783 return true;
48786 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
48787 and extract-odd permutations. */
48789 static bool
48790 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
48792 rtx t1, t2, t3, t4, t5;
48794 switch (d->vmode)
48796 case V4DFmode:
48797 if (d->testing_p)
48798 break;
48799 t1 = gen_reg_rtx (V4DFmode);
48800 t2 = gen_reg_rtx (V4DFmode);
48802 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48803 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
48804 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
48806 /* Now an unpck[lh]pd will produce the result required. */
48807 if (odd)
48808 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
48809 else
48810 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
48811 emit_insn (t3);
48812 break;
48814 case V8SFmode:
48816 int mask = odd ? 0xdd : 0x88;
48818 if (d->testing_p)
48819 break;
48820 t1 = gen_reg_rtx (V8SFmode);
48821 t2 = gen_reg_rtx (V8SFmode);
48822 t3 = gen_reg_rtx (V8SFmode);
48824 /* Shuffle within the 128-bit lanes to produce:
48825 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
48826 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
48827 GEN_INT (mask)));
48829 /* Shuffle the lanes around to produce:
48830 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
48831 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
48832 GEN_INT (0x3)));
48834 /* Shuffle within the 128-bit lanes to produce:
48835 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
48836 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
48838 /* Shuffle within the 128-bit lanes to produce:
48839 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
48840 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
48842 /* Shuffle the lanes around to produce:
48843 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
48844 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
48845 GEN_INT (0x20)));
48847 break;
48849 case V2DFmode:
48850 case V4SFmode:
48851 case V2DImode:
48852 case V4SImode:
48853 /* These are always directly implementable by expand_vec_perm_1. */
48854 gcc_unreachable ();
48856 case V8HImode:
48857 if (TARGET_SSE4_1)
48858 return expand_vec_perm_even_odd_pack (d);
48859 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
48860 return expand_vec_perm_pshufb2 (d);
48861 else
48863 if (d->testing_p)
48864 break;
48865 /* We need 2*log2(N)-1 operations to achieve odd/even
48866 with interleave. */
48867 t1 = gen_reg_rtx (V8HImode);
48868 t2 = gen_reg_rtx (V8HImode);
48869 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
48870 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
48871 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
48872 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
48873 if (odd)
48874 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
48875 else
48876 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
48877 emit_insn (t3);
48879 break;
48881 case V16QImode:
48882 return expand_vec_perm_even_odd_pack (d);
48884 case V16HImode:
48885 case V32QImode:
48886 return expand_vec_perm_even_odd_pack (d);
48888 case V4DImode:
48889 if (!TARGET_AVX2)
48891 struct expand_vec_perm_d d_copy = *d;
48892 d_copy.vmode = V4DFmode;
48893 if (d->testing_p)
48894 d_copy.target = gen_lowpart (V4DFmode, d->target);
48895 else
48896 d_copy.target = gen_reg_rtx (V4DFmode);
48897 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
48898 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
48899 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48901 if (!d->testing_p)
48902 emit_move_insn (d->target,
48903 gen_lowpart (V4DImode, d_copy.target));
48904 return true;
48906 return false;
48909 if (d->testing_p)
48910 break;
48912 t1 = gen_reg_rtx (V4DImode);
48913 t2 = gen_reg_rtx (V4DImode);
48915 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48916 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
48917 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
48919 /* Now an vpunpck[lh]qdq will produce the result required. */
48920 if (odd)
48921 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
48922 else
48923 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
48924 emit_insn (t3);
48925 break;
48927 case V8SImode:
48928 if (!TARGET_AVX2)
48930 struct expand_vec_perm_d d_copy = *d;
48931 d_copy.vmode = V8SFmode;
48932 if (d->testing_p)
48933 d_copy.target = gen_lowpart (V8SFmode, d->target);
48934 else
48935 d_copy.target = gen_reg_rtx (V8SFmode);
48936 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
48937 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
48938 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48940 if (!d->testing_p)
48941 emit_move_insn (d->target,
48942 gen_lowpart (V8SImode, d_copy.target));
48943 return true;
48945 return false;
48948 if (d->testing_p)
48949 break;
48951 t1 = gen_reg_rtx (V8SImode);
48952 t2 = gen_reg_rtx (V8SImode);
48953 t3 = gen_reg_rtx (V4DImode);
48954 t4 = gen_reg_rtx (V4DImode);
48955 t5 = gen_reg_rtx (V4DImode);
48957 /* Shuffle the lanes around into
48958 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
48959 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
48960 gen_lowpart (V4DImode, d->op1),
48961 GEN_INT (0x20)));
48962 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
48963 gen_lowpart (V4DImode, d->op1),
48964 GEN_INT (0x31)));
48966 /* Swap the 2nd and 3rd position in each lane into
48967 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
48968 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
48969 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48970 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
48971 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48973 /* Now an vpunpck[lh]qdq will produce
48974 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
48975 if (odd)
48976 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
48977 gen_lowpart (V4DImode, t2));
48978 else
48979 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
48980 gen_lowpart (V4DImode, t2));
48981 emit_insn (t3);
48982 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
48983 break;
48985 default:
48986 gcc_unreachable ();
48989 return true;
48992 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48993 extract-even and extract-odd permutations. */
48995 static bool
48996 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
48998 unsigned i, odd, nelt = d->nelt;
49000 odd = d->perm[0];
49001 if (odd != 0 && odd != 1)
49002 return false;
49004 for (i = 1; i < nelt; ++i)
49005 if (d->perm[i] != 2 * i + odd)
49006 return false;
49008 return expand_vec_perm_even_odd_1 (d, odd);
49011 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
49012 permutations. We assume that expand_vec_perm_1 has already failed. */
49014 static bool
49015 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
49017 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
49018 machine_mode vmode = d->vmode;
49019 unsigned char perm2[4];
49020 rtx op0 = d->op0, dest;
49021 bool ok;
49023 switch (vmode)
49025 case V4DFmode:
49026 case V8SFmode:
49027 /* These are special-cased in sse.md so that we can optionally
49028 use the vbroadcast instruction. They expand to two insns
49029 if the input happens to be in a register. */
49030 gcc_unreachable ();
49032 case V2DFmode:
49033 case V2DImode:
49034 case V4SFmode:
49035 case V4SImode:
49036 /* These are always implementable using standard shuffle patterns. */
49037 gcc_unreachable ();
49039 case V8HImode:
49040 case V16QImode:
49041 /* These can be implemented via interleave. We save one insn by
49042 stopping once we have promoted to V4SImode and then use pshufd. */
49043 if (d->testing_p)
49044 return true;
49047 rtx dest;
49048 rtx (*gen) (rtx, rtx, rtx)
49049 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
49050 : gen_vec_interleave_lowv8hi;
49052 if (elt >= nelt2)
49054 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
49055 : gen_vec_interleave_highv8hi;
49056 elt -= nelt2;
49058 nelt2 /= 2;
49060 dest = gen_reg_rtx (vmode);
49061 emit_insn (gen (dest, op0, op0));
49062 vmode = get_mode_wider_vector (vmode);
49063 op0 = gen_lowpart (vmode, dest);
49065 while (vmode != V4SImode);
49067 memset (perm2, elt, 4);
49068 dest = gen_reg_rtx (V4SImode);
49069 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
49070 gcc_assert (ok);
49071 if (!d->testing_p)
49072 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
49073 return true;
49075 case V64QImode:
49076 case V32QImode:
49077 case V16HImode:
49078 case V8SImode:
49079 case V4DImode:
49080 /* For AVX2 broadcasts of the first element vpbroadcast* or
49081 vpermq should be used by expand_vec_perm_1. */
49082 gcc_assert (!TARGET_AVX2 || d->perm[0]);
49083 return false;
49085 default:
49086 gcc_unreachable ();
49090 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
49091 broadcast permutations. */
49093 static bool
49094 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
49096 unsigned i, elt, nelt = d->nelt;
49098 if (!d->one_operand_p)
49099 return false;
49101 elt = d->perm[0];
49102 for (i = 1; i < nelt; ++i)
49103 if (d->perm[i] != elt)
49104 return false;
49106 return expand_vec_perm_broadcast_1 (d);
49109 /* Implement arbitrary permutations of two V64QImode operands
49110 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
49111 static bool
49112 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
49114 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
49115 return false;
49117 if (d->testing_p)
49118 return true;
49120 struct expand_vec_perm_d ds[2];
49121 rtx rperm[128], vperm, target0, target1;
49122 unsigned int i, nelt;
49123 machine_mode vmode;
49125 nelt = d->nelt;
49126 vmode = V64QImode;
49128 for (i = 0; i < 2; i++)
49130 ds[i] = *d;
49131 ds[i].vmode = V32HImode;
49132 ds[i].nelt = 32;
49133 ds[i].target = gen_reg_rtx (V32HImode);
49134 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
49135 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
49138 /* Prepare permutations such that the first one takes care of
49139 putting the even bytes into the right positions or one higher
49140 positions (ds[0]) and the second one takes care of
49141 putting the odd bytes into the right positions or one below
49142 (ds[1]). */
49144 for (i = 0; i < nelt; i++)
49146 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
49147 if (i & 1)
49149 rperm[i] = constm1_rtx;
49150 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49152 else
49154 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49155 rperm[i + 64] = constm1_rtx;
49159 bool ok = expand_vec_perm_1 (&ds[0]);
49160 gcc_assert (ok);
49161 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
49163 ok = expand_vec_perm_1 (&ds[1]);
49164 gcc_assert (ok);
49165 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
49167 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
49168 vperm = force_reg (vmode, vperm);
49169 target0 = gen_reg_rtx (V64QImode);
49170 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
49172 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
49173 vperm = force_reg (vmode, vperm);
49174 target1 = gen_reg_rtx (V64QImode);
49175 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
49177 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
49178 return true;
49181 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
49182 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
49183 all the shorter instruction sequences. */
49185 static bool
49186 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
49188 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
49189 unsigned int i, nelt, eltsz;
49190 bool used[4];
49192 if (!TARGET_AVX2
49193 || d->one_operand_p
49194 || (d->vmode != V32QImode && d->vmode != V16HImode))
49195 return false;
49197 if (d->testing_p)
49198 return true;
49200 nelt = d->nelt;
49201 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
49203 /* Generate 4 permutation masks. If the required element is within
49204 the same lane, it is shuffled in. If the required element from the
49205 other lane, force a zero by setting bit 7 in the permutation mask.
49206 In the other mask the mask has non-negative elements if element
49207 is requested from the other lane, but also moved to the other lane,
49208 so that the result of vpshufb can have the two V2TImode halves
49209 swapped. */
49210 m128 = GEN_INT (-128);
49211 for (i = 0; i < 32; ++i)
49213 rperm[0][i] = m128;
49214 rperm[1][i] = m128;
49215 rperm[2][i] = m128;
49216 rperm[3][i] = m128;
49218 used[0] = false;
49219 used[1] = false;
49220 used[2] = false;
49221 used[3] = false;
49222 for (i = 0; i < nelt; ++i)
49224 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
49225 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
49226 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
49228 for (j = 0; j < eltsz; ++j)
49229 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
49230 used[which] = true;
49233 for (i = 0; i < 2; ++i)
49235 if (!used[2 * i + 1])
49237 h[i] = NULL_RTX;
49238 continue;
49240 vperm = gen_rtx_CONST_VECTOR (V32QImode,
49241 gen_rtvec_v (32, rperm[2 * i + 1]));
49242 vperm = force_reg (V32QImode, vperm);
49243 h[i] = gen_reg_rtx (V32QImode);
49244 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49245 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
49248 /* Swap the 128-byte lanes of h[X]. */
49249 for (i = 0; i < 2; ++i)
49251 if (h[i] == NULL_RTX)
49252 continue;
49253 op = gen_reg_rtx (V4DImode);
49254 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
49255 const2_rtx, GEN_INT (3), const0_rtx,
49256 const1_rtx));
49257 h[i] = gen_lowpart (V32QImode, op);
49260 for (i = 0; i < 2; ++i)
49262 if (!used[2 * i])
49264 l[i] = NULL_RTX;
49265 continue;
49267 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
49268 vperm = force_reg (V32QImode, vperm);
49269 l[i] = gen_reg_rtx (V32QImode);
49270 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49271 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
49274 for (i = 0; i < 2; ++i)
49276 if (h[i] && l[i])
49278 op = gen_reg_rtx (V32QImode);
49279 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
49280 l[i] = op;
49282 else if (h[i])
49283 l[i] = h[i];
49286 gcc_assert (l[0] && l[1]);
49287 op = d->target;
49288 if (d->vmode != V32QImode)
49289 op = gen_reg_rtx (V32QImode);
49290 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
49291 if (op != d->target)
49292 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49293 return true;
49296 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
49297 With all of the interface bits taken care of, perform the expansion
49298 in D and return true on success. */
49300 static bool
49301 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
49303 /* Try a single instruction expansion. */
49304 if (expand_vec_perm_1 (d))
49305 return true;
49307 /* Try sequences of two instructions. */
49309 if (expand_vec_perm_pshuflw_pshufhw (d))
49310 return true;
49312 if (expand_vec_perm_palignr (d, false))
49313 return true;
49315 if (expand_vec_perm_interleave2 (d))
49316 return true;
49318 if (expand_vec_perm_broadcast (d))
49319 return true;
49321 if (expand_vec_perm_vpermq_perm_1 (d))
49322 return true;
49324 if (expand_vec_perm_vperm2f128 (d))
49325 return true;
49327 if (expand_vec_perm_pblendv (d))
49328 return true;
49330 /* Try sequences of three instructions. */
49332 if (expand_vec_perm_even_odd_pack (d))
49333 return true;
49335 if (expand_vec_perm_2vperm2f128_vshuf (d))
49336 return true;
49338 if (expand_vec_perm_pshufb2 (d))
49339 return true;
49341 if (expand_vec_perm_interleave3 (d))
49342 return true;
49344 if (expand_vec_perm_vperm2f128_vblend (d))
49345 return true;
49347 /* Try sequences of four instructions. */
49349 if (expand_vec_perm_vpshufb2_vpermq (d))
49350 return true;
49352 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
49353 return true;
49355 if (expand_vec_perm_vpermi2_vpshub2 (d))
49356 return true;
49358 /* ??? Look for narrow permutations whose element orderings would
49359 allow the promotion to a wider mode. */
49361 /* ??? Look for sequences of interleave or a wider permute that place
49362 the data into the correct lanes for a half-vector shuffle like
49363 pshuf[lh]w or vpermilps. */
49365 /* ??? Look for sequences of interleave that produce the desired results.
49366 The combinatorics of punpck[lh] get pretty ugly... */
49368 if (expand_vec_perm_even_odd (d))
49369 return true;
49371 /* Even longer sequences. */
49372 if (expand_vec_perm_vpshufb4_vpermq2 (d))
49373 return true;
49375 return false;
49378 /* If a permutation only uses one operand, make it clear. Returns true
49379 if the permutation references both operands. */
49381 static bool
49382 canonicalize_perm (struct expand_vec_perm_d *d)
49384 int i, which, nelt = d->nelt;
49386 for (i = which = 0; i < nelt; ++i)
49387 which |= (d->perm[i] < nelt ? 1 : 2);
49389 d->one_operand_p = true;
49390 switch (which)
49392 default:
49393 gcc_unreachable();
49395 case 3:
49396 if (!rtx_equal_p (d->op0, d->op1))
49398 d->one_operand_p = false;
49399 break;
49401 /* The elements of PERM do not suggest that only the first operand
49402 is used, but both operands are identical. Allow easier matching
49403 of the permutation by folding the permutation into the single
49404 input vector. */
49405 /* FALLTHRU */
49407 case 2:
49408 for (i = 0; i < nelt; ++i)
49409 d->perm[i] &= nelt - 1;
49410 d->op0 = d->op1;
49411 break;
49413 case 1:
49414 d->op1 = d->op0;
49415 break;
49418 return (which == 3);
49421 bool
49422 ix86_expand_vec_perm_const (rtx operands[4])
49424 struct expand_vec_perm_d d;
49425 unsigned char perm[MAX_VECT_LEN];
49426 int i, nelt;
49427 bool two_args;
49428 rtx sel;
49430 d.target = operands[0];
49431 d.op0 = operands[1];
49432 d.op1 = operands[2];
49433 sel = operands[3];
49435 d.vmode = GET_MODE (d.target);
49436 gcc_assert (VECTOR_MODE_P (d.vmode));
49437 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49438 d.testing_p = false;
49440 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
49441 gcc_assert (XVECLEN (sel, 0) == nelt);
49442 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49444 for (i = 0; i < nelt; ++i)
49446 rtx e = XVECEXP (sel, 0, i);
49447 int ei = INTVAL (e) & (2 * nelt - 1);
49448 d.perm[i] = ei;
49449 perm[i] = ei;
49452 two_args = canonicalize_perm (&d);
49454 if (ix86_expand_vec_perm_const_1 (&d))
49455 return true;
49457 /* If the selector says both arguments are needed, but the operands are the
49458 same, the above tried to expand with one_operand_p and flattened selector.
49459 If that didn't work, retry without one_operand_p; we succeeded with that
49460 during testing. */
49461 if (two_args && d.one_operand_p)
49463 d.one_operand_p = false;
49464 memcpy (d.perm, perm, sizeof (perm));
49465 return ix86_expand_vec_perm_const_1 (&d);
49468 return false;
49471 /* Implement targetm.vectorize.vec_perm_const_ok. */
49473 static bool
49474 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
49475 const unsigned char *sel)
49477 struct expand_vec_perm_d d;
49478 unsigned int i, nelt, which;
49479 bool ret;
49481 d.vmode = vmode;
49482 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49483 d.testing_p = true;
49485 /* Given sufficient ISA support we can just return true here
49486 for selected vector modes. */
49487 switch (d.vmode)
49489 case V16SFmode:
49490 case V16SImode:
49491 case V8DImode:
49492 case V8DFmode:
49493 if (TARGET_AVX512F)
49494 /* All implementable with a single vpermi2 insn. */
49495 return true;
49496 break;
49497 case V32HImode:
49498 if (TARGET_AVX512BW)
49499 /* All implementable with a single vpermi2 insn. */
49500 return true;
49501 break;
49502 case V64QImode:
49503 if (TARGET_AVX512BW)
49504 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
49505 return true;
49506 break;
49507 case V8SImode:
49508 case V8SFmode:
49509 case V4DFmode:
49510 case V4DImode:
49511 if (TARGET_AVX512VL)
49512 /* All implementable with a single vpermi2 insn. */
49513 return true;
49514 break;
49515 case V16HImode:
49516 if (TARGET_AVX2)
49517 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49518 return true;
49519 break;
49520 case V32QImode:
49521 if (TARGET_AVX2)
49522 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49523 return true;
49524 break;
49525 case V4SImode:
49526 case V4SFmode:
49527 case V8HImode:
49528 case V16QImode:
49529 /* All implementable with a single vpperm insn. */
49530 if (TARGET_XOP)
49531 return true;
49532 /* All implementable with 2 pshufb + 1 ior. */
49533 if (TARGET_SSSE3)
49534 return true;
49535 break;
49536 case V2DImode:
49537 case V2DFmode:
49538 /* All implementable with shufpd or unpck[lh]pd. */
49539 return true;
49540 default:
49541 return false;
49544 /* Extract the values from the vector CST into the permutation
49545 array in D. */
49546 memcpy (d.perm, sel, nelt);
49547 for (i = which = 0; i < nelt; ++i)
49549 unsigned char e = d.perm[i];
49550 gcc_assert (e < 2 * nelt);
49551 which |= (e < nelt ? 1 : 2);
49554 /* For all elements from second vector, fold the elements to first. */
49555 if (which == 2)
49556 for (i = 0; i < nelt; ++i)
49557 d.perm[i] -= nelt;
49559 /* Check whether the mask can be applied to the vector type. */
49560 d.one_operand_p = (which != 3);
49562 /* Implementable with shufps or pshufd. */
49563 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49564 return true;
49566 /* Otherwise we have to go through the motions and see if we can
49567 figure out how to generate the requested permutation. */
49568 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49569 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49570 if (!d.one_operand_p)
49571 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49573 start_sequence ();
49574 ret = ix86_expand_vec_perm_const_1 (&d);
49575 end_sequence ();
49577 return ret;
49580 void
49581 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49583 struct expand_vec_perm_d d;
49584 unsigned i, nelt;
49586 d.target = targ;
49587 d.op0 = op0;
49588 d.op1 = op1;
49589 d.vmode = GET_MODE (targ);
49590 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49591 d.one_operand_p = false;
49592 d.testing_p = false;
49594 for (i = 0; i < nelt; ++i)
49595 d.perm[i] = i * 2 + odd;
49597 /* We'll either be able to implement the permutation directly... */
49598 if (expand_vec_perm_1 (&d))
49599 return;
49601 /* ... or we use the special-case patterns. */
49602 expand_vec_perm_even_odd_1 (&d, odd);
49605 static void
49606 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49608 struct expand_vec_perm_d d;
49609 unsigned i, nelt, base;
49610 bool ok;
49612 d.target = targ;
49613 d.op0 = op0;
49614 d.op1 = op1;
49615 d.vmode = GET_MODE (targ);
49616 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49617 d.one_operand_p = false;
49618 d.testing_p = false;
49620 base = high_p ? nelt / 2 : 0;
49621 for (i = 0; i < nelt / 2; ++i)
49623 d.perm[i * 2] = i + base;
49624 d.perm[i * 2 + 1] = i + base + nelt;
49627 /* Note that for AVX this isn't one instruction. */
49628 ok = ix86_expand_vec_perm_const_1 (&d);
49629 gcc_assert (ok);
49633 /* Expand a vector operation CODE for a V*QImode in terms of the
49634 same operation on V*HImode. */
49636 void
49637 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49639 machine_mode qimode = GET_MODE (dest);
49640 machine_mode himode;
49641 rtx (*gen_il) (rtx, rtx, rtx);
49642 rtx (*gen_ih) (rtx, rtx, rtx);
49643 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49644 struct expand_vec_perm_d d;
49645 bool ok, full_interleave;
49646 bool uns_p = false;
49647 int i;
49649 switch (qimode)
49651 case V16QImode:
49652 himode = V8HImode;
49653 gen_il = gen_vec_interleave_lowv16qi;
49654 gen_ih = gen_vec_interleave_highv16qi;
49655 break;
49656 case V32QImode:
49657 himode = V16HImode;
49658 gen_il = gen_avx2_interleave_lowv32qi;
49659 gen_ih = gen_avx2_interleave_highv32qi;
49660 break;
49661 case V64QImode:
49662 himode = V32HImode;
49663 gen_il = gen_avx512bw_interleave_lowv64qi;
49664 gen_ih = gen_avx512bw_interleave_highv64qi;
49665 break;
49666 default:
49667 gcc_unreachable ();
49670 op2_l = op2_h = op2;
49671 switch (code)
49673 case MULT:
49674 /* Unpack data such that we've got a source byte in each low byte of
49675 each word. We don't care what goes into the high byte of each word.
49676 Rather than trying to get zero in there, most convenient is to let
49677 it be a copy of the low byte. */
49678 op2_l = gen_reg_rtx (qimode);
49679 op2_h = gen_reg_rtx (qimode);
49680 emit_insn (gen_il (op2_l, op2, op2));
49681 emit_insn (gen_ih (op2_h, op2, op2));
49682 /* FALLTHRU */
49684 op1_l = gen_reg_rtx (qimode);
49685 op1_h = gen_reg_rtx (qimode);
49686 emit_insn (gen_il (op1_l, op1, op1));
49687 emit_insn (gen_ih (op1_h, op1, op1));
49688 full_interleave = qimode == V16QImode;
49689 break;
49691 case ASHIFT:
49692 case LSHIFTRT:
49693 uns_p = true;
49694 /* FALLTHRU */
49695 case ASHIFTRT:
49696 op1_l = gen_reg_rtx (himode);
49697 op1_h = gen_reg_rtx (himode);
49698 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
49699 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
49700 full_interleave = true;
49701 break;
49702 default:
49703 gcc_unreachable ();
49706 /* Perform the operation. */
49707 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
49708 1, OPTAB_DIRECT);
49709 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
49710 1, OPTAB_DIRECT);
49711 gcc_assert (res_l && res_h);
49713 /* Merge the data back into the right place. */
49714 d.target = dest;
49715 d.op0 = gen_lowpart (qimode, res_l);
49716 d.op1 = gen_lowpart (qimode, res_h);
49717 d.vmode = qimode;
49718 d.nelt = GET_MODE_NUNITS (qimode);
49719 d.one_operand_p = false;
49720 d.testing_p = false;
49722 if (full_interleave)
49724 /* For SSE2, we used an full interleave, so the desired
49725 results are in the even elements. */
49726 for (i = 0; i < 64; ++i)
49727 d.perm[i] = i * 2;
49729 else
49731 /* For AVX, the interleave used above was not cross-lane. So the
49732 extraction is evens but with the second and third quarter swapped.
49733 Happily, that is even one insn shorter than even extraction. */
49734 for (i = 0; i < 64; ++i)
49735 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
49738 ok = ix86_expand_vec_perm_const_1 (&d);
49739 gcc_assert (ok);
49741 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49742 gen_rtx_fmt_ee (code, qimode, op1, op2));
49745 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
49746 if op is CONST_VECTOR with all odd elements equal to their
49747 preceding element. */
49749 static bool
49750 const_vector_equal_evenodd_p (rtx op)
49752 machine_mode mode = GET_MODE (op);
49753 int i, nunits = GET_MODE_NUNITS (mode);
49754 if (GET_CODE (op) != CONST_VECTOR
49755 || nunits != CONST_VECTOR_NUNITS (op))
49756 return false;
49757 for (i = 0; i < nunits; i += 2)
49758 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
49759 return false;
49760 return true;
49763 void
49764 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
49765 bool uns_p, bool odd_p)
49767 machine_mode mode = GET_MODE (op1);
49768 machine_mode wmode = GET_MODE (dest);
49769 rtx x;
49770 rtx orig_op1 = op1, orig_op2 = op2;
49772 if (!nonimmediate_operand (op1, mode))
49773 op1 = force_reg (mode, op1);
49774 if (!nonimmediate_operand (op2, mode))
49775 op2 = force_reg (mode, op2);
49777 /* We only play even/odd games with vectors of SImode. */
49778 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
49780 /* If we're looking for the odd results, shift those members down to
49781 the even slots. For some cpus this is faster than a PSHUFD. */
49782 if (odd_p)
49784 /* For XOP use vpmacsdqh, but only for smult, as it is only
49785 signed. */
49786 if (TARGET_XOP && mode == V4SImode && !uns_p)
49788 x = force_reg (wmode, CONST0_RTX (wmode));
49789 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
49790 return;
49793 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
49794 if (!const_vector_equal_evenodd_p (orig_op1))
49795 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
49796 x, NULL, 1, OPTAB_DIRECT);
49797 if (!const_vector_equal_evenodd_p (orig_op2))
49798 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
49799 x, NULL, 1, OPTAB_DIRECT);
49800 op1 = gen_lowpart (mode, op1);
49801 op2 = gen_lowpart (mode, op2);
49804 if (mode == V16SImode)
49806 if (uns_p)
49807 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
49808 else
49809 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
49811 else if (mode == V8SImode)
49813 if (uns_p)
49814 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
49815 else
49816 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
49818 else if (uns_p)
49819 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
49820 else if (TARGET_SSE4_1)
49821 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
49822 else
49824 rtx s1, s2, t0, t1, t2;
49826 /* The easiest way to implement this without PMULDQ is to go through
49827 the motions as if we are performing a full 64-bit multiply. With
49828 the exception that we need to do less shuffling of the elements. */
49830 /* Compute the sign-extension, aka highparts, of the two operands. */
49831 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49832 op1, pc_rtx, pc_rtx);
49833 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49834 op2, pc_rtx, pc_rtx);
49836 /* Multiply LO(A) * HI(B), and vice-versa. */
49837 t1 = gen_reg_rtx (wmode);
49838 t2 = gen_reg_rtx (wmode);
49839 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
49840 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
49842 /* Multiply LO(A) * LO(B). */
49843 t0 = gen_reg_rtx (wmode);
49844 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
49846 /* Combine and shift the highparts into place. */
49847 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
49848 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
49849 1, OPTAB_DIRECT);
49851 /* Combine high and low parts. */
49852 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
49853 return;
49855 emit_insn (x);
49858 void
49859 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
49860 bool uns_p, bool high_p)
49862 machine_mode wmode = GET_MODE (dest);
49863 machine_mode mode = GET_MODE (op1);
49864 rtx t1, t2, t3, t4, mask;
49866 switch (mode)
49868 case V4SImode:
49869 t1 = gen_reg_rtx (mode);
49870 t2 = gen_reg_rtx (mode);
49871 if (TARGET_XOP && !uns_p)
49873 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
49874 shuffle the elements once so that all elements are in the right
49875 place for immediate use: { A C B D }. */
49876 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
49877 const1_rtx, GEN_INT (3)));
49878 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
49879 const1_rtx, GEN_INT (3)));
49881 else
49883 /* Put the elements into place for the multiply. */
49884 ix86_expand_vec_interleave (t1, op1, op1, high_p);
49885 ix86_expand_vec_interleave (t2, op2, op2, high_p);
49886 high_p = false;
49888 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
49889 break;
49891 case V8SImode:
49892 /* Shuffle the elements between the lanes. After this we
49893 have { A B E F | C D G H } for each operand. */
49894 t1 = gen_reg_rtx (V4DImode);
49895 t2 = gen_reg_rtx (V4DImode);
49896 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
49897 const0_rtx, const2_rtx,
49898 const1_rtx, GEN_INT (3)));
49899 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
49900 const0_rtx, const2_rtx,
49901 const1_rtx, GEN_INT (3)));
49903 /* Shuffle the elements within the lanes. After this we
49904 have { A A B B | C C D D } or { E E F F | G G H H }. */
49905 t3 = gen_reg_rtx (V8SImode);
49906 t4 = gen_reg_rtx (V8SImode);
49907 mask = GEN_INT (high_p
49908 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
49909 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
49910 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
49911 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
49913 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
49914 break;
49916 case V8HImode:
49917 case V16HImode:
49918 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
49919 uns_p, OPTAB_DIRECT);
49920 t2 = expand_binop (mode,
49921 uns_p ? umul_highpart_optab : smul_highpart_optab,
49922 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
49923 gcc_assert (t1 && t2);
49925 t3 = gen_reg_rtx (mode);
49926 ix86_expand_vec_interleave (t3, t1, t2, high_p);
49927 emit_move_insn (dest, gen_lowpart (wmode, t3));
49928 break;
49930 case V16QImode:
49931 case V32QImode:
49932 case V32HImode:
49933 case V16SImode:
49934 case V64QImode:
49935 t1 = gen_reg_rtx (wmode);
49936 t2 = gen_reg_rtx (wmode);
49937 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
49938 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
49940 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
49941 break;
49943 default:
49944 gcc_unreachable ();
49948 void
49949 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
49951 rtx res_1, res_2, res_3, res_4;
49953 res_1 = gen_reg_rtx (V4SImode);
49954 res_2 = gen_reg_rtx (V4SImode);
49955 res_3 = gen_reg_rtx (V2DImode);
49956 res_4 = gen_reg_rtx (V2DImode);
49957 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
49958 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
49960 /* Move the results in element 2 down to element 1; we don't care
49961 what goes in elements 2 and 3. Then we can merge the parts
49962 back together with an interleave.
49964 Note that two other sequences were tried:
49965 (1) Use interleaves at the start instead of psrldq, which allows
49966 us to use a single shufps to merge things back at the end.
49967 (2) Use shufps here to combine the two vectors, then pshufd to
49968 put the elements in the correct order.
49969 In both cases the cost of the reformatting stall was too high
49970 and the overall sequence slower. */
49972 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
49973 const0_rtx, const2_rtx,
49974 const0_rtx, const0_rtx));
49975 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
49976 const0_rtx, const2_rtx,
49977 const0_rtx, const0_rtx));
49978 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
49980 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
49983 void
49984 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
49986 machine_mode mode = GET_MODE (op0);
49987 rtx t1, t2, t3, t4, t5, t6;
49989 if (TARGET_AVX512DQ && mode == V8DImode)
49990 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
49991 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
49992 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
49993 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
49994 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
49995 else if (TARGET_XOP && mode == V2DImode)
49997 /* op1: A,B,C,D, op2: E,F,G,H */
49998 op1 = gen_lowpart (V4SImode, op1);
49999 op2 = gen_lowpart (V4SImode, op2);
50001 t1 = gen_reg_rtx (V4SImode);
50002 t2 = gen_reg_rtx (V4SImode);
50003 t3 = gen_reg_rtx (V2DImode);
50004 t4 = gen_reg_rtx (V2DImode);
50006 /* t1: B,A,D,C */
50007 emit_insn (gen_sse2_pshufd_1 (t1, op1,
50008 GEN_INT (1),
50009 GEN_INT (0),
50010 GEN_INT (3),
50011 GEN_INT (2)));
50013 /* t2: (B*E),(A*F),(D*G),(C*H) */
50014 emit_insn (gen_mulv4si3 (t2, t1, op2));
50016 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
50017 emit_insn (gen_xop_phadddq (t3, t2));
50019 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
50020 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
50022 /* Multiply lower parts and add all */
50023 t5 = gen_reg_rtx (V2DImode);
50024 emit_insn (gen_vec_widen_umult_even_v4si (t5,
50025 gen_lowpart (V4SImode, op1),
50026 gen_lowpart (V4SImode, op2)));
50027 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
50030 else
50032 machine_mode nmode;
50033 rtx (*umul) (rtx, rtx, rtx);
50035 if (mode == V2DImode)
50037 umul = gen_vec_widen_umult_even_v4si;
50038 nmode = V4SImode;
50040 else if (mode == V4DImode)
50042 umul = gen_vec_widen_umult_even_v8si;
50043 nmode = V8SImode;
50045 else if (mode == V8DImode)
50047 umul = gen_vec_widen_umult_even_v16si;
50048 nmode = V16SImode;
50050 else
50051 gcc_unreachable ();
50054 /* Multiply low parts. */
50055 t1 = gen_reg_rtx (mode);
50056 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
50058 /* Shift input vectors right 32 bits so we can multiply high parts. */
50059 t6 = GEN_INT (32);
50060 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
50061 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
50063 /* Multiply high parts by low parts. */
50064 t4 = gen_reg_rtx (mode);
50065 t5 = gen_reg_rtx (mode);
50066 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
50067 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
50069 /* Combine and shift the highparts back. */
50070 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
50071 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
50073 /* Combine high and low parts. */
50074 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
50077 set_unique_reg_note (get_last_insn (), REG_EQUAL,
50078 gen_rtx_MULT (mode, op1, op2));
50081 /* Return 1 if control tansfer instruction INSN
50082 should be encoded with bnd prefix.
50083 If insn is NULL then return 1 when control
50084 transfer instructions should be prefixed with
50085 bnd by default for current function. */
50087 bool
50088 ix86_bnd_prefixed_insn_p (rtx insn)
50090 /* For call insns check special flag. */
50091 if (insn && CALL_P (insn))
50093 rtx call = get_call_rtx_from (insn);
50094 if (call)
50095 return CALL_EXPR_WITH_BOUNDS_P (call);
50098 /* All other insns are prefixed only if function is instrumented. */
50099 return chkp_function_instrumented_p (current_function_decl);
50102 /* Calculate integer abs() using only SSE2 instructions. */
50104 void
50105 ix86_expand_sse2_abs (rtx target, rtx input)
50107 machine_mode mode = GET_MODE (target);
50108 rtx tmp0, tmp1, x;
50110 switch (mode)
50112 /* For 32-bit signed integer X, the best way to calculate the absolute
50113 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
50114 case V4SImode:
50115 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
50116 GEN_INT (GET_MODE_BITSIZE
50117 (GET_MODE_INNER (mode)) - 1),
50118 NULL, 0, OPTAB_DIRECT);
50119 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
50120 NULL, 0, OPTAB_DIRECT);
50121 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
50122 target, 0, OPTAB_DIRECT);
50123 break;
50125 /* For 16-bit signed integer X, the best way to calculate the absolute
50126 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
50127 case V8HImode:
50128 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50130 x = expand_simple_binop (mode, SMAX, tmp0, input,
50131 target, 0, OPTAB_DIRECT);
50132 break;
50134 /* For 8-bit signed integer X, the best way to calculate the absolute
50135 value of X is min ((unsigned char) X, (unsigned char) (-X)),
50136 as SSE2 provides the PMINUB insn. */
50137 case V16QImode:
50138 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50140 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
50141 target, 0, OPTAB_DIRECT);
50142 break;
50144 default:
50145 gcc_unreachable ();
50148 if (x != target)
50149 emit_move_insn (target, x);
50152 /* Expand an insert into a vector register through pinsr insn.
50153 Return true if successful. */
50155 bool
50156 ix86_expand_pinsr (rtx *operands)
50158 rtx dst = operands[0];
50159 rtx src = operands[3];
50161 unsigned int size = INTVAL (operands[1]);
50162 unsigned int pos = INTVAL (operands[2]);
50164 if (GET_CODE (dst) == SUBREG)
50166 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
50167 dst = SUBREG_REG (dst);
50170 if (GET_CODE (src) == SUBREG)
50171 src = SUBREG_REG (src);
50173 switch (GET_MODE (dst))
50175 case V16QImode:
50176 case V8HImode:
50177 case V4SImode:
50178 case V2DImode:
50180 machine_mode srcmode, dstmode;
50181 rtx (*pinsr)(rtx, rtx, rtx, rtx);
50183 srcmode = mode_for_size (size, MODE_INT, 0);
50185 switch (srcmode)
50187 case QImode:
50188 if (!TARGET_SSE4_1)
50189 return false;
50190 dstmode = V16QImode;
50191 pinsr = gen_sse4_1_pinsrb;
50192 break;
50194 case HImode:
50195 if (!TARGET_SSE2)
50196 return false;
50197 dstmode = V8HImode;
50198 pinsr = gen_sse2_pinsrw;
50199 break;
50201 case SImode:
50202 if (!TARGET_SSE4_1)
50203 return false;
50204 dstmode = V4SImode;
50205 pinsr = gen_sse4_1_pinsrd;
50206 break;
50208 case DImode:
50209 gcc_assert (TARGET_64BIT);
50210 if (!TARGET_SSE4_1)
50211 return false;
50212 dstmode = V2DImode;
50213 pinsr = gen_sse4_1_pinsrq;
50214 break;
50216 default:
50217 return false;
50220 rtx d = dst;
50221 if (GET_MODE (dst) != dstmode)
50222 d = gen_reg_rtx (dstmode);
50223 src = gen_lowpart (srcmode, src);
50225 pos /= size;
50227 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
50228 GEN_INT (1 << pos)));
50229 if (d != dst)
50230 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
50231 return true;
50234 default:
50235 return false;
50239 /* This function returns the calling abi specific va_list type node.
50240 It returns the FNDECL specific va_list type. */
50242 static tree
50243 ix86_fn_abi_va_list (tree fndecl)
50245 if (!TARGET_64BIT)
50246 return va_list_type_node;
50247 gcc_assert (fndecl != NULL_TREE);
50249 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
50250 return ms_va_list_type_node;
50251 else
50252 return sysv_va_list_type_node;
50255 /* Returns the canonical va_list type specified by TYPE. If there
50256 is no valid TYPE provided, it return NULL_TREE. */
50258 static tree
50259 ix86_canonical_va_list_type (tree type)
50261 tree wtype, htype;
50263 /* Resolve references and pointers to va_list type. */
50264 if (TREE_CODE (type) == MEM_REF)
50265 type = TREE_TYPE (type);
50266 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
50267 type = TREE_TYPE (type);
50268 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
50269 type = TREE_TYPE (type);
50271 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
50273 wtype = va_list_type_node;
50274 gcc_assert (wtype != NULL_TREE);
50275 htype = type;
50276 if (TREE_CODE (wtype) == ARRAY_TYPE)
50278 /* If va_list is an array type, the argument may have decayed
50279 to a pointer type, e.g. by being passed to another function.
50280 In that case, unwrap both types so that we can compare the
50281 underlying records. */
50282 if (TREE_CODE (htype) == ARRAY_TYPE
50283 || POINTER_TYPE_P (htype))
50285 wtype = TREE_TYPE (wtype);
50286 htype = TREE_TYPE (htype);
50289 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50290 return va_list_type_node;
50291 wtype = sysv_va_list_type_node;
50292 gcc_assert (wtype != NULL_TREE);
50293 htype = type;
50294 if (TREE_CODE (wtype) == ARRAY_TYPE)
50296 /* If va_list is an array type, the argument may have decayed
50297 to a pointer type, e.g. by being passed to another function.
50298 In that case, unwrap both types so that we can compare the
50299 underlying records. */
50300 if (TREE_CODE (htype) == ARRAY_TYPE
50301 || POINTER_TYPE_P (htype))
50303 wtype = TREE_TYPE (wtype);
50304 htype = TREE_TYPE (htype);
50307 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50308 return sysv_va_list_type_node;
50309 wtype = ms_va_list_type_node;
50310 gcc_assert (wtype != NULL_TREE);
50311 htype = type;
50312 if (TREE_CODE (wtype) == ARRAY_TYPE)
50314 /* If va_list is an array type, the argument may have decayed
50315 to a pointer type, e.g. by being passed to another function.
50316 In that case, unwrap both types so that we can compare the
50317 underlying records. */
50318 if (TREE_CODE (htype) == ARRAY_TYPE
50319 || POINTER_TYPE_P (htype))
50321 wtype = TREE_TYPE (wtype);
50322 htype = TREE_TYPE (htype);
50325 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50326 return ms_va_list_type_node;
50327 return NULL_TREE;
50329 return std_canonical_va_list_type (type);
50332 /* Iterate through the target-specific builtin types for va_list.
50333 IDX denotes the iterator, *PTREE is set to the result type of
50334 the va_list builtin, and *PNAME to its internal type.
50335 Returns zero if there is no element for this index, otherwise
50336 IDX should be increased upon the next call.
50337 Note, do not iterate a base builtin's name like __builtin_va_list.
50338 Used from c_common_nodes_and_builtins. */
50340 static int
50341 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50343 if (TARGET_64BIT)
50345 switch (idx)
50347 default:
50348 break;
50350 case 0:
50351 *ptree = ms_va_list_type_node;
50352 *pname = "__builtin_ms_va_list";
50353 return 1;
50355 case 1:
50356 *ptree = sysv_va_list_type_node;
50357 *pname = "__builtin_sysv_va_list";
50358 return 1;
50362 return 0;
50365 #undef TARGET_SCHED_DISPATCH
50366 #define TARGET_SCHED_DISPATCH has_dispatch
50367 #undef TARGET_SCHED_DISPATCH_DO
50368 #define TARGET_SCHED_DISPATCH_DO do_dispatch
50369 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50370 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50371 #undef TARGET_SCHED_REORDER
50372 #define TARGET_SCHED_REORDER ix86_sched_reorder
50373 #undef TARGET_SCHED_ADJUST_PRIORITY
50374 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50375 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50376 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50377 ix86_dependencies_evaluation_hook
50379 /* The size of the dispatch window is the total number of bytes of
50380 object code allowed in a window. */
50381 #define DISPATCH_WINDOW_SIZE 16
50383 /* Number of dispatch windows considered for scheduling. */
50384 #define MAX_DISPATCH_WINDOWS 3
50386 /* Maximum number of instructions in a window. */
50387 #define MAX_INSN 4
50389 /* Maximum number of immediate operands in a window. */
50390 #define MAX_IMM 4
50392 /* Maximum number of immediate bits allowed in a window. */
50393 #define MAX_IMM_SIZE 128
50395 /* Maximum number of 32 bit immediates allowed in a window. */
50396 #define MAX_IMM_32 4
50398 /* Maximum number of 64 bit immediates allowed in a window. */
50399 #define MAX_IMM_64 2
50401 /* Maximum total of loads or prefetches allowed in a window. */
50402 #define MAX_LOAD 2
50404 /* Maximum total of stores allowed in a window. */
50405 #define MAX_STORE 1
50407 #undef BIG
50408 #define BIG 100
50411 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
50412 enum dispatch_group {
50413 disp_no_group = 0,
50414 disp_load,
50415 disp_store,
50416 disp_load_store,
50417 disp_prefetch,
50418 disp_imm,
50419 disp_imm_32,
50420 disp_imm_64,
50421 disp_branch,
50422 disp_cmp,
50423 disp_jcc,
50424 disp_last
50427 /* Number of allowable groups in a dispatch window. It is an array
50428 indexed by dispatch_group enum. 100 is used as a big number,
50429 because the number of these kind of operations does not have any
50430 effect in dispatch window, but we need them for other reasons in
50431 the table. */
50432 static unsigned int num_allowable_groups[disp_last] = {
50433 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
50436 char group_name[disp_last + 1][16] = {
50437 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50438 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50439 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50442 /* Instruction path. */
50443 enum insn_path {
50444 no_path = 0,
50445 path_single, /* Single micro op. */
50446 path_double, /* Double micro op. */
50447 path_multi, /* Instructions with more than 2 micro op.. */
50448 last_path
50451 /* sched_insn_info defines a window to the instructions scheduled in
50452 the basic block. It contains a pointer to the insn_info table and
50453 the instruction scheduled.
50455 Windows are allocated for each basic block and are linked
50456 together. */
50457 typedef struct sched_insn_info_s {
50458 rtx insn;
50459 enum dispatch_group group;
50460 enum insn_path path;
50461 int byte_len;
50462 int imm_bytes;
50463 } sched_insn_info;
50465 /* Linked list of dispatch windows. This is a two way list of
50466 dispatch windows of a basic block. It contains information about
50467 the number of uops in the window and the total number of
50468 instructions and of bytes in the object code for this dispatch
50469 window. */
50470 typedef struct dispatch_windows_s {
50471 int num_insn; /* Number of insn in the window. */
50472 int num_uops; /* Number of uops in the window. */
50473 int window_size; /* Number of bytes in the window. */
50474 int window_num; /* Window number between 0 or 1. */
50475 int num_imm; /* Number of immediates in an insn. */
50476 int num_imm_32; /* Number of 32 bit immediates in an insn. */
50477 int num_imm_64; /* Number of 64 bit immediates in an insn. */
50478 int imm_size; /* Total immediates in the window. */
50479 int num_loads; /* Total memory loads in the window. */
50480 int num_stores; /* Total memory stores in the window. */
50481 int violation; /* Violation exists in window. */
50482 sched_insn_info *window; /* Pointer to the window. */
50483 struct dispatch_windows_s *next;
50484 struct dispatch_windows_s *prev;
50485 } dispatch_windows;
50487 /* Immediate valuse used in an insn. */
50488 typedef struct imm_info_s
50490 int imm;
50491 int imm32;
50492 int imm64;
50493 } imm_info;
50495 static dispatch_windows *dispatch_window_list;
50496 static dispatch_windows *dispatch_window_list1;
50498 /* Get dispatch group of insn. */
50500 static enum dispatch_group
50501 get_mem_group (rtx_insn *insn)
50503 enum attr_memory memory;
50505 if (INSN_CODE (insn) < 0)
50506 return disp_no_group;
50507 memory = get_attr_memory (insn);
50508 if (memory == MEMORY_STORE)
50509 return disp_store;
50511 if (memory == MEMORY_LOAD)
50512 return disp_load;
50514 if (memory == MEMORY_BOTH)
50515 return disp_load_store;
50517 return disp_no_group;
50520 /* Return true if insn is a compare instruction. */
50522 static bool
50523 is_cmp (rtx_insn *insn)
50525 enum attr_type type;
50527 type = get_attr_type (insn);
50528 return (type == TYPE_TEST
50529 || type == TYPE_ICMP
50530 || type == TYPE_FCMP
50531 || GET_CODE (PATTERN (insn)) == COMPARE);
50534 /* Return true if a dispatch violation encountered. */
50536 static bool
50537 dispatch_violation (void)
50539 if (dispatch_window_list->next)
50540 return dispatch_window_list->next->violation;
50541 return dispatch_window_list->violation;
50544 /* Return true if insn is a branch instruction. */
50546 static bool
50547 is_branch (rtx insn)
50549 return (CALL_P (insn) || JUMP_P (insn));
50552 /* Return true if insn is a prefetch instruction. */
50554 static bool
50555 is_prefetch (rtx insn)
50557 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
50560 /* This function initializes a dispatch window and the list container holding a
50561 pointer to the window. */
50563 static void
50564 init_window (int window_num)
50566 int i;
50567 dispatch_windows *new_list;
50569 if (window_num == 0)
50570 new_list = dispatch_window_list;
50571 else
50572 new_list = dispatch_window_list1;
50574 new_list->num_insn = 0;
50575 new_list->num_uops = 0;
50576 new_list->window_size = 0;
50577 new_list->next = NULL;
50578 new_list->prev = NULL;
50579 new_list->window_num = window_num;
50580 new_list->num_imm = 0;
50581 new_list->num_imm_32 = 0;
50582 new_list->num_imm_64 = 0;
50583 new_list->imm_size = 0;
50584 new_list->num_loads = 0;
50585 new_list->num_stores = 0;
50586 new_list->violation = false;
50588 for (i = 0; i < MAX_INSN; i++)
50590 new_list->window[i].insn = NULL;
50591 new_list->window[i].group = disp_no_group;
50592 new_list->window[i].path = no_path;
50593 new_list->window[i].byte_len = 0;
50594 new_list->window[i].imm_bytes = 0;
50596 return;
50599 /* This function allocates and initializes a dispatch window and the
50600 list container holding a pointer to the window. */
50602 static dispatch_windows *
50603 allocate_window (void)
50605 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
50606 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
50608 return new_list;
50611 /* This routine initializes the dispatch scheduling information. It
50612 initiates building dispatch scheduler tables and constructs the
50613 first dispatch window. */
50615 static void
50616 init_dispatch_sched (void)
50618 /* Allocate a dispatch list and a window. */
50619 dispatch_window_list = allocate_window ();
50620 dispatch_window_list1 = allocate_window ();
50621 init_window (0);
50622 init_window (1);
50625 /* This function returns true if a branch is detected. End of a basic block
50626 does not have to be a branch, but here we assume only branches end a
50627 window. */
50629 static bool
50630 is_end_basic_block (enum dispatch_group group)
50632 return group == disp_branch;
50635 /* This function is called when the end of a window processing is reached. */
50637 static void
50638 process_end_window (void)
50640 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
50641 if (dispatch_window_list->next)
50643 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
50644 gcc_assert (dispatch_window_list->window_size
50645 + dispatch_window_list1->window_size <= 48);
50646 init_window (1);
50648 init_window (0);
50651 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
50652 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
50653 for 48 bytes of instructions. Note that these windows are not dispatch
50654 windows that their sizes are DISPATCH_WINDOW_SIZE. */
50656 static dispatch_windows *
50657 allocate_next_window (int window_num)
50659 if (window_num == 0)
50661 if (dispatch_window_list->next)
50662 init_window (1);
50663 init_window (0);
50664 return dispatch_window_list;
50667 dispatch_window_list->next = dispatch_window_list1;
50668 dispatch_window_list1->prev = dispatch_window_list;
50670 return dispatch_window_list1;
50673 /* Compute number of immediate operands of an instruction. */
50675 static void
50676 find_constant (rtx in_rtx, imm_info *imm_values)
50678 if (INSN_P (in_rtx))
50679 in_rtx = PATTERN (in_rtx);
50680 subrtx_iterator::array_type array;
50681 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
50682 if (const_rtx x = *iter)
50683 switch (GET_CODE (x))
50685 case CONST:
50686 case SYMBOL_REF:
50687 case CONST_INT:
50688 (imm_values->imm)++;
50689 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
50690 (imm_values->imm32)++;
50691 else
50692 (imm_values->imm64)++;
50693 break;
50695 case CONST_DOUBLE:
50696 (imm_values->imm)++;
50697 (imm_values->imm64)++;
50698 break;
50700 case CODE_LABEL:
50701 if (LABEL_KIND (x) == LABEL_NORMAL)
50703 (imm_values->imm)++;
50704 (imm_values->imm32)++;
50706 break;
50708 default:
50709 break;
50713 /* Return total size of immediate operands of an instruction along with number
50714 of corresponding immediate-operands. It initializes its parameters to zero
50715 befor calling FIND_CONSTANT.
50716 INSN is the input instruction. IMM is the total of immediates.
50717 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
50718 bit immediates. */
50720 static int
50721 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
50723 imm_info imm_values = {0, 0, 0};
50725 find_constant (insn, &imm_values);
50726 *imm = imm_values.imm;
50727 *imm32 = imm_values.imm32;
50728 *imm64 = imm_values.imm64;
50729 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
50732 /* This function indicates if an operand of an instruction is an
50733 immediate. */
50735 static bool
50736 has_immediate (rtx insn)
50738 int num_imm_operand;
50739 int num_imm32_operand;
50740 int num_imm64_operand;
50742 if (insn)
50743 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50744 &num_imm64_operand);
50745 return false;
50748 /* Return single or double path for instructions. */
50750 static enum insn_path
50751 get_insn_path (rtx_insn *insn)
50753 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
50755 if ((int)path == 0)
50756 return path_single;
50758 if ((int)path == 1)
50759 return path_double;
50761 return path_multi;
50764 /* Return insn dispatch group. */
50766 static enum dispatch_group
50767 get_insn_group (rtx_insn *insn)
50769 enum dispatch_group group = get_mem_group (insn);
50770 if (group)
50771 return group;
50773 if (is_branch (insn))
50774 return disp_branch;
50776 if (is_cmp (insn))
50777 return disp_cmp;
50779 if (has_immediate (insn))
50780 return disp_imm;
50782 if (is_prefetch (insn))
50783 return disp_prefetch;
50785 return disp_no_group;
50788 /* Count number of GROUP restricted instructions in a dispatch
50789 window WINDOW_LIST. */
50791 static int
50792 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
50794 enum dispatch_group group = get_insn_group (insn);
50795 int imm_size;
50796 int num_imm_operand;
50797 int num_imm32_operand;
50798 int num_imm64_operand;
50800 if (group == disp_no_group)
50801 return 0;
50803 if (group == disp_imm)
50805 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50806 &num_imm64_operand);
50807 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
50808 || num_imm_operand + window_list->num_imm > MAX_IMM
50809 || (num_imm32_operand > 0
50810 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
50811 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
50812 || (num_imm64_operand > 0
50813 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
50814 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
50815 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
50816 && num_imm64_operand > 0
50817 && ((window_list->num_imm_64 > 0
50818 && window_list->num_insn >= 2)
50819 || window_list->num_insn >= 3)))
50820 return BIG;
50822 return 1;
50825 if ((group == disp_load_store
50826 && (window_list->num_loads >= MAX_LOAD
50827 || window_list->num_stores >= MAX_STORE))
50828 || ((group == disp_load
50829 || group == disp_prefetch)
50830 && window_list->num_loads >= MAX_LOAD)
50831 || (group == disp_store
50832 && window_list->num_stores >= MAX_STORE))
50833 return BIG;
50835 return 1;
50838 /* This function returns true if insn satisfies dispatch rules on the
50839 last window scheduled. */
50841 static bool
50842 fits_dispatch_window (rtx_insn *insn)
50844 dispatch_windows *window_list = dispatch_window_list;
50845 dispatch_windows *window_list_next = dispatch_window_list->next;
50846 unsigned int num_restrict;
50847 enum dispatch_group group = get_insn_group (insn);
50848 enum insn_path path = get_insn_path (insn);
50849 int sum;
50851 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
50852 instructions should be given the lowest priority in the
50853 scheduling process in Haifa scheduler to make sure they will be
50854 scheduled in the same dispatch window as the reference to them. */
50855 if (group == disp_jcc || group == disp_cmp)
50856 return false;
50858 /* Check nonrestricted. */
50859 if (group == disp_no_group || group == disp_branch)
50860 return true;
50862 /* Get last dispatch window. */
50863 if (window_list_next)
50864 window_list = window_list_next;
50866 if (window_list->window_num == 1)
50868 sum = window_list->prev->window_size + window_list->window_size;
50870 if (sum == 32
50871 || (min_insn_size (insn) + sum) >= 48)
50872 /* Window 1 is full. Go for next window. */
50873 return true;
50876 num_restrict = count_num_restricted (insn, window_list);
50878 if (num_restrict > num_allowable_groups[group])
50879 return false;
50881 /* See if it fits in the first window. */
50882 if (window_list->window_num == 0)
50884 /* The first widow should have only single and double path
50885 uops. */
50886 if (path == path_double
50887 && (window_list->num_uops + 2) > MAX_INSN)
50888 return false;
50889 else if (path != path_single)
50890 return false;
50892 return true;
50895 /* Add an instruction INSN with NUM_UOPS micro-operations to the
50896 dispatch window WINDOW_LIST. */
50898 static void
50899 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
50901 int byte_len = min_insn_size (insn);
50902 int num_insn = window_list->num_insn;
50903 int imm_size;
50904 sched_insn_info *window = window_list->window;
50905 enum dispatch_group group = get_insn_group (insn);
50906 enum insn_path path = get_insn_path (insn);
50907 int num_imm_operand;
50908 int num_imm32_operand;
50909 int num_imm64_operand;
50911 if (!window_list->violation && group != disp_cmp
50912 && !fits_dispatch_window (insn))
50913 window_list->violation = true;
50915 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50916 &num_imm64_operand);
50918 /* Initialize window with new instruction. */
50919 window[num_insn].insn = insn;
50920 window[num_insn].byte_len = byte_len;
50921 window[num_insn].group = group;
50922 window[num_insn].path = path;
50923 window[num_insn].imm_bytes = imm_size;
50925 window_list->window_size += byte_len;
50926 window_list->num_insn = num_insn + 1;
50927 window_list->num_uops = window_list->num_uops + num_uops;
50928 window_list->imm_size += imm_size;
50929 window_list->num_imm += num_imm_operand;
50930 window_list->num_imm_32 += num_imm32_operand;
50931 window_list->num_imm_64 += num_imm64_operand;
50933 if (group == disp_store)
50934 window_list->num_stores += 1;
50935 else if (group == disp_load
50936 || group == disp_prefetch)
50937 window_list->num_loads += 1;
50938 else if (group == disp_load_store)
50940 window_list->num_stores += 1;
50941 window_list->num_loads += 1;
50945 /* Adds a scheduled instruction, INSN, to the current dispatch window.
50946 If the total bytes of instructions or the number of instructions in
50947 the window exceed allowable, it allocates a new window. */
50949 static void
50950 add_to_dispatch_window (rtx_insn *insn)
50952 int byte_len;
50953 dispatch_windows *window_list;
50954 dispatch_windows *next_list;
50955 dispatch_windows *window0_list;
50956 enum insn_path path;
50957 enum dispatch_group insn_group;
50958 bool insn_fits;
50959 int num_insn;
50960 int num_uops;
50961 int window_num;
50962 int insn_num_uops;
50963 int sum;
50965 if (INSN_CODE (insn) < 0)
50966 return;
50968 byte_len = min_insn_size (insn);
50969 window_list = dispatch_window_list;
50970 next_list = window_list->next;
50971 path = get_insn_path (insn);
50972 insn_group = get_insn_group (insn);
50974 /* Get the last dispatch window. */
50975 if (next_list)
50976 window_list = dispatch_window_list->next;
50978 if (path == path_single)
50979 insn_num_uops = 1;
50980 else if (path == path_double)
50981 insn_num_uops = 2;
50982 else
50983 insn_num_uops = (int) path;
50985 /* If current window is full, get a new window.
50986 Window number zero is full, if MAX_INSN uops are scheduled in it.
50987 Window number one is full, if window zero's bytes plus window
50988 one's bytes is 32, or if the bytes of the new instruction added
50989 to the total makes it greater than 48, or it has already MAX_INSN
50990 instructions in it. */
50991 num_insn = window_list->num_insn;
50992 num_uops = window_list->num_uops;
50993 window_num = window_list->window_num;
50994 insn_fits = fits_dispatch_window (insn);
50996 if (num_insn >= MAX_INSN
50997 || num_uops + insn_num_uops > MAX_INSN
50998 || !(insn_fits))
51000 window_num = ~window_num & 1;
51001 window_list = allocate_next_window (window_num);
51004 if (window_num == 0)
51006 add_insn_window (insn, window_list, insn_num_uops);
51007 if (window_list->num_insn >= MAX_INSN
51008 && insn_group == disp_branch)
51010 process_end_window ();
51011 return;
51014 else if (window_num == 1)
51016 window0_list = window_list->prev;
51017 sum = window0_list->window_size + window_list->window_size;
51018 if (sum == 32
51019 || (byte_len + sum) >= 48)
51021 process_end_window ();
51022 window_list = dispatch_window_list;
51025 add_insn_window (insn, window_list, insn_num_uops);
51027 else
51028 gcc_unreachable ();
51030 if (is_end_basic_block (insn_group))
51032 /* End of basic block is reached do end-basic-block process. */
51033 process_end_window ();
51034 return;
51038 /* Print the dispatch window, WINDOW_NUM, to FILE. */
51040 DEBUG_FUNCTION static void
51041 debug_dispatch_window_file (FILE *file, int window_num)
51043 dispatch_windows *list;
51044 int i;
51046 if (window_num == 0)
51047 list = dispatch_window_list;
51048 else
51049 list = dispatch_window_list1;
51051 fprintf (file, "Window #%d:\n", list->window_num);
51052 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
51053 list->num_insn, list->num_uops, list->window_size);
51054 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51055 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
51057 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
51058 list->num_stores);
51059 fprintf (file, " insn info:\n");
51061 for (i = 0; i < MAX_INSN; i++)
51063 if (!list->window[i].insn)
51064 break;
51065 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
51066 i, group_name[list->window[i].group],
51067 i, (void *)list->window[i].insn,
51068 i, list->window[i].path,
51069 i, list->window[i].byte_len,
51070 i, list->window[i].imm_bytes);
51074 /* Print to stdout a dispatch window. */
51076 DEBUG_FUNCTION void
51077 debug_dispatch_window (int window_num)
51079 debug_dispatch_window_file (stdout, window_num);
51082 /* Print INSN dispatch information to FILE. */
51084 DEBUG_FUNCTION static void
51085 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
51087 int byte_len;
51088 enum insn_path path;
51089 enum dispatch_group group;
51090 int imm_size;
51091 int num_imm_operand;
51092 int num_imm32_operand;
51093 int num_imm64_operand;
51095 if (INSN_CODE (insn) < 0)
51096 return;
51098 byte_len = min_insn_size (insn);
51099 path = get_insn_path (insn);
51100 group = get_insn_group (insn);
51101 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51102 &num_imm64_operand);
51104 fprintf (file, " insn info:\n");
51105 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
51106 group_name[group], path, byte_len);
51107 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51108 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
51111 /* Print to STDERR the status of the ready list with respect to
51112 dispatch windows. */
51114 DEBUG_FUNCTION void
51115 debug_ready_dispatch (void)
51117 int i;
51118 int no_ready = number_in_ready ();
51120 fprintf (stdout, "Number of ready: %d\n", no_ready);
51122 for (i = 0; i < no_ready; i++)
51123 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
51126 /* This routine is the driver of the dispatch scheduler. */
51128 static void
51129 do_dispatch (rtx_insn *insn, int mode)
51131 if (mode == DISPATCH_INIT)
51132 init_dispatch_sched ();
51133 else if (mode == ADD_TO_DISPATCH_WINDOW)
51134 add_to_dispatch_window (insn);
51137 /* Return TRUE if Dispatch Scheduling is supported. */
51139 static bool
51140 has_dispatch (rtx_insn *insn, int action)
51142 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
51143 && flag_dispatch_scheduler)
51144 switch (action)
51146 default:
51147 return false;
51149 case IS_DISPATCH_ON:
51150 return true;
51151 break;
51153 case IS_CMP:
51154 return is_cmp (insn);
51156 case DISPATCH_VIOLATION:
51157 return dispatch_violation ();
51159 case FITS_DISPATCH_WINDOW:
51160 return fits_dispatch_window (insn);
51163 return false;
51166 /* Implementation of reassociation_width target hook used by
51167 reassoc phase to identify parallelism level in reassociated
51168 tree. Statements tree_code is passed in OPC. Arguments type
51169 is passed in MODE.
51171 Currently parallel reassociation is enabled for Atom
51172 processors only and we set reassociation width to be 2
51173 because Atom may issue up to 2 instructions per cycle.
51175 Return value should be fixed if parallel reassociation is
51176 enabled for other processors. */
51178 static int
51179 ix86_reassociation_width (unsigned int, machine_mode mode)
51181 /* Vector part. */
51182 if (VECTOR_MODE_P (mode))
51184 if (TARGET_VECTOR_PARALLEL_EXECUTION)
51185 return 2;
51186 else
51187 return 1;
51190 /* Scalar part. */
51191 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
51192 return 2;
51193 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
51194 return 2;
51195 else
51196 return 1;
51199 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
51200 place emms and femms instructions. */
51202 static machine_mode
51203 ix86_preferred_simd_mode (machine_mode mode)
51205 if (!TARGET_SSE)
51206 return word_mode;
51208 switch (mode)
51210 case QImode:
51211 return TARGET_AVX512BW ? V64QImode :
51212 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
51213 case HImode:
51214 return TARGET_AVX512BW ? V32HImode :
51215 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
51216 case SImode:
51217 return TARGET_AVX512F ? V16SImode :
51218 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
51219 case DImode:
51220 return TARGET_AVX512F ? V8DImode :
51221 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
51223 case SFmode:
51224 if (TARGET_AVX512F)
51225 return V16SFmode;
51226 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51227 return V8SFmode;
51228 else
51229 return V4SFmode;
51231 case DFmode:
51232 if (!TARGET_VECTORIZE_DOUBLE)
51233 return word_mode;
51234 else if (TARGET_AVX512F)
51235 return V8DFmode;
51236 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51237 return V4DFmode;
51238 else if (TARGET_SSE2)
51239 return V2DFmode;
51240 /* FALLTHRU */
51242 default:
51243 return word_mode;
51247 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
51248 vectors. If AVX512F is enabled then try vectorizing with 512bit,
51249 256bit and 128bit vectors. */
51251 static unsigned int
51252 ix86_autovectorize_vector_sizes (void)
51254 return TARGET_AVX512F ? 64 | 32 | 16 :
51255 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
51260 /* Return class of registers which could be used for pseudo of MODE
51261 and of class RCLASS for spilling instead of memory. Return NO_REGS
51262 if it is not possible or non-profitable. */
51263 static reg_class_t
51264 ix86_spill_class (reg_class_t rclass, machine_mode mode)
51266 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
51267 && (mode == SImode || (TARGET_64BIT && mode == DImode))
51268 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
51269 return ALL_SSE_REGS;
51270 return NO_REGS;
51273 /* Implement targetm.vectorize.init_cost. */
51275 static void *
51276 ix86_init_cost (struct loop *)
51278 unsigned *cost = XNEWVEC (unsigned, 3);
51279 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
51280 return cost;
51283 /* Implement targetm.vectorize.add_stmt_cost. */
51285 static unsigned
51286 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
51287 struct _stmt_vec_info *stmt_info, int misalign,
51288 enum vect_cost_model_location where)
51290 unsigned *cost = (unsigned *) data;
51291 unsigned retval = 0;
51293 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
51294 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
51296 /* Statements in an inner loop relative to the loop being
51297 vectorized are weighted more heavily. The value here is
51298 arbitrary and could potentially be improved with analysis. */
51299 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
51300 count *= 50; /* FIXME. */
51302 retval = (unsigned) (count * stmt_cost);
51304 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
51305 for Silvermont as it has out of order integer pipeline and can execute
51306 2 scalar instruction per tick, but has in order SIMD pipeline. */
51307 if (TARGET_SILVERMONT || TARGET_INTEL)
51308 if (stmt_info && stmt_info->stmt)
51310 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
51311 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
51312 retval = (retval * 17) / 10;
51315 cost[where] += retval;
51317 return retval;
51320 /* Implement targetm.vectorize.finish_cost. */
51322 static void
51323 ix86_finish_cost (void *data, unsigned *prologue_cost,
51324 unsigned *body_cost, unsigned *epilogue_cost)
51326 unsigned *cost = (unsigned *) data;
51327 *prologue_cost = cost[vect_prologue];
51328 *body_cost = cost[vect_body];
51329 *epilogue_cost = cost[vect_epilogue];
51332 /* Implement targetm.vectorize.destroy_cost_data. */
51334 static void
51335 ix86_destroy_cost_data (void *data)
51337 free (data);
51340 /* Validate target specific memory model bits in VAL. */
51342 static unsigned HOST_WIDE_INT
51343 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
51345 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
51346 bool strong;
51348 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
51349 |MEMMODEL_MASK)
51350 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
51352 warning (OPT_Winvalid_memory_model,
51353 "Unknown architecture specific memory model");
51354 return MEMMODEL_SEQ_CST;
51356 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
51357 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
51359 warning (OPT_Winvalid_memory_model,
51360 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
51361 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
51363 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
51365 warning (OPT_Winvalid_memory_model,
51366 "HLE_RELEASE not used with RELEASE or stronger memory model");
51367 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
51369 return val;
51372 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
51373 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
51374 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
51375 or number of vecsize_mangle variants that should be emitted. */
51377 static int
51378 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
51379 struct cgraph_simd_clone *clonei,
51380 tree base_type, int num)
51382 int ret = 1;
51384 if (clonei->simdlen
51385 && (clonei->simdlen < 2
51386 || clonei->simdlen > 16
51387 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
51389 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51390 "unsupported simdlen %d", clonei->simdlen);
51391 return 0;
51394 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
51395 if (TREE_CODE (ret_type) != VOID_TYPE)
51396 switch (TYPE_MODE (ret_type))
51398 case QImode:
51399 case HImode:
51400 case SImode:
51401 case DImode:
51402 case SFmode:
51403 case DFmode:
51404 /* case SCmode: */
51405 /* case DCmode: */
51406 break;
51407 default:
51408 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51409 "unsupported return type %qT for simd\n", ret_type);
51410 return 0;
51413 tree t;
51414 int i;
51416 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
51417 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
51418 switch (TYPE_MODE (TREE_TYPE (t)))
51420 case QImode:
51421 case HImode:
51422 case SImode:
51423 case DImode:
51424 case SFmode:
51425 case DFmode:
51426 /* case SCmode: */
51427 /* case DCmode: */
51428 break;
51429 default:
51430 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51431 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
51432 return 0;
51435 if (clonei->cilk_elemental)
51437 /* Parse here processor clause. If not present, default to 'b'. */
51438 clonei->vecsize_mangle = 'b';
51440 else if (!TREE_PUBLIC (node->decl))
51442 /* If the function isn't exported, we can pick up just one ISA
51443 for the clones. */
51444 if (TARGET_AVX2)
51445 clonei->vecsize_mangle = 'd';
51446 else if (TARGET_AVX)
51447 clonei->vecsize_mangle = 'c';
51448 else
51449 clonei->vecsize_mangle = 'b';
51450 ret = 1;
51452 else
51454 clonei->vecsize_mangle = "bcd"[num];
51455 ret = 3;
51457 switch (clonei->vecsize_mangle)
51459 case 'b':
51460 clonei->vecsize_int = 128;
51461 clonei->vecsize_float = 128;
51462 break;
51463 case 'c':
51464 clonei->vecsize_int = 128;
51465 clonei->vecsize_float = 256;
51466 break;
51467 case 'd':
51468 clonei->vecsize_int = 256;
51469 clonei->vecsize_float = 256;
51470 break;
51472 if (clonei->simdlen == 0)
51474 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
51475 clonei->simdlen = clonei->vecsize_int;
51476 else
51477 clonei->simdlen = clonei->vecsize_float;
51478 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
51479 if (clonei->simdlen > 16)
51480 clonei->simdlen = 16;
51482 return ret;
51485 /* Add target attribute to SIMD clone NODE if needed. */
51487 static void
51488 ix86_simd_clone_adjust (struct cgraph_node *node)
51490 const char *str = NULL;
51491 gcc_assert (node->decl == cfun->decl);
51492 switch (node->simdclone->vecsize_mangle)
51494 case 'b':
51495 if (!TARGET_SSE2)
51496 str = "sse2";
51497 break;
51498 case 'c':
51499 if (!TARGET_AVX)
51500 str = "avx";
51501 break;
51502 case 'd':
51503 if (!TARGET_AVX2)
51504 str = "avx2";
51505 break;
51506 default:
51507 gcc_unreachable ();
51509 if (str == NULL)
51510 return;
51511 push_cfun (NULL);
51512 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
51513 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
51514 gcc_assert (ok);
51515 pop_cfun ();
51516 ix86_reset_previous_fndecl ();
51517 ix86_set_current_function (node->decl);
51520 /* If SIMD clone NODE can't be used in a vectorized loop
51521 in current function, return -1, otherwise return a badness of using it
51522 (0 if it is most desirable from vecsize_mangle point of view, 1
51523 slightly less desirable, etc.). */
51525 static int
51526 ix86_simd_clone_usable (struct cgraph_node *node)
51528 switch (node->simdclone->vecsize_mangle)
51530 case 'b':
51531 if (!TARGET_SSE2)
51532 return -1;
51533 if (!TARGET_AVX)
51534 return 0;
51535 return TARGET_AVX2 ? 2 : 1;
51536 case 'c':
51537 if (!TARGET_AVX)
51538 return -1;
51539 return TARGET_AVX2 ? 1 : 0;
51540 break;
51541 case 'd':
51542 if (!TARGET_AVX2)
51543 return -1;
51544 return 0;
51545 default:
51546 gcc_unreachable ();
51550 /* This function adjusts the unroll factor based on
51551 the hardware capabilities. For ex, bdver3 has
51552 a loop buffer which makes unrolling of smaller
51553 loops less important. This function decides the
51554 unroll factor using number of memory references
51555 (value 32 is used) as a heuristic. */
51557 static unsigned
51558 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
51560 basic_block *bbs;
51561 rtx_insn *insn;
51562 unsigned i;
51563 unsigned mem_count = 0;
51565 if (!TARGET_ADJUST_UNROLL)
51566 return nunroll;
51568 /* Count the number of memory references within the loop body.
51569 This value determines the unrolling factor for bdver3 and bdver4
51570 architectures. */
51571 subrtx_iterator::array_type array;
51572 bbs = get_loop_body (loop);
51573 for (i = 0; i < loop->num_nodes; i++)
51574 FOR_BB_INSNS (bbs[i], insn)
51575 if (NONDEBUG_INSN_P (insn))
51576 FOR_EACH_SUBRTX (iter, array, insn, NONCONST)
51577 if (const_rtx x = *iter)
51578 if (MEM_P (x))
51580 machine_mode mode = GET_MODE (x);
51581 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
51582 if (n_words > 4)
51583 mem_count += 2;
51584 else
51585 mem_count += 1;
51587 free (bbs);
51589 if (mem_count && mem_count <=32)
51590 return 32/mem_count;
51592 return nunroll;
51596 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
51598 static bool
51599 ix86_float_exceptions_rounding_supported_p (void)
51601 /* For x87 floating point with standard excess precision handling,
51602 there is no adddf3 pattern (since x87 floating point only has
51603 XFmode operations) so the default hook implementation gets this
51604 wrong. */
51605 return TARGET_80387 || TARGET_SSE_MATH;
51608 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
51610 static void
51611 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
51613 if (!TARGET_80387 && !TARGET_SSE_MATH)
51614 return;
51615 tree exceptions_var = create_tmp_var (integer_type_node);
51616 if (TARGET_80387)
51618 tree fenv_index_type = build_index_type (size_int (6));
51619 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
51620 tree fenv_var = create_tmp_var (fenv_type);
51621 mark_addressable (fenv_var);
51622 tree fenv_ptr = build_pointer_type (fenv_type);
51623 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
51624 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
51625 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
51626 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
51627 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
51628 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
51629 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
51630 tree hold_fnclex = build_call_expr (fnclex, 0);
51631 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
51632 hold_fnclex);
51633 *clear = build_call_expr (fnclex, 0);
51634 tree sw_var = create_tmp_var (short_unsigned_type_node);
51635 tree fnstsw_call = build_call_expr (fnstsw, 0);
51636 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
51637 sw_var, fnstsw_call);
51638 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
51639 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
51640 exceptions_var, exceptions_x87);
51641 *update = build2 (COMPOUND_EXPR, integer_type_node,
51642 sw_mod, update_mod);
51643 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
51644 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
51646 if (TARGET_SSE_MATH)
51648 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node);
51649 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node);
51650 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
51651 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
51652 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
51653 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
51654 mxcsr_orig_var, stmxcsr_hold_call);
51655 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
51656 mxcsr_orig_var,
51657 build_int_cst (unsigned_type_node, 0x1f80));
51658 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
51659 build_int_cst (unsigned_type_node, 0xffffffc0));
51660 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
51661 mxcsr_mod_var, hold_mod_val);
51662 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51663 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
51664 hold_assign_orig, hold_assign_mod);
51665 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
51666 ldmxcsr_hold_call);
51667 if (*hold)
51668 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
51669 else
51670 *hold = hold_all;
51671 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51672 if (*clear)
51673 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
51674 ldmxcsr_clear_call);
51675 else
51676 *clear = ldmxcsr_clear_call;
51677 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
51678 tree exceptions_sse = fold_convert (integer_type_node,
51679 stxmcsr_update_call);
51680 if (*update)
51682 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
51683 exceptions_var, exceptions_sse);
51684 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
51685 exceptions_var, exceptions_mod);
51686 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
51687 exceptions_assign);
51689 else
51690 *update = build2 (MODIFY_EXPR, integer_type_node,
51691 exceptions_var, exceptions_sse);
51692 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
51693 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51694 ldmxcsr_update_call);
51696 tree atomic_feraiseexcept
51697 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
51698 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
51699 1, exceptions_var);
51700 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51701 atomic_feraiseexcept_call);
51704 /* Return mode to be used for bounds or VOIDmode
51705 if bounds are not supported. */
51707 static enum machine_mode
51708 ix86_mpx_bound_mode ()
51710 /* Do not support pointer checker if MPX
51711 is not enabled. */
51712 if (!TARGET_MPX)
51714 if (flag_check_pointer_bounds)
51715 warning (0, "Pointer Checker requires MPX support on this target."
51716 " Use -mmpx options to enable MPX.");
51717 return VOIDmode;
51720 return BNDmode;
51723 /* Return constant used to statically initialize constant bounds.
51725 This function is used to create special bound values. For now
51726 only INIT bounds and NONE bounds are expected. More special
51727 values may be added later. */
51729 static tree
51730 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
51732 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
51733 : build_zero_cst (pointer_sized_int_node);
51734 tree high = ub ? build_zero_cst (pointer_sized_int_node)
51735 : build_minus_one_cst (pointer_sized_int_node);
51737 /* This function is supposed to be used to create INIT and
51738 NONE bounds only. */
51739 gcc_assert ((lb == 0 && ub == -1)
51740 || (lb == -1 && ub == 0));
51742 return build_complex (NULL, low, high);
51745 /* Generate a list of statements STMTS to initialize pointer bounds
51746 variable VAR with bounds LB and UB. Return the number of generated
51747 statements. */
51749 static int
51750 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
51752 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
51753 tree lhs, modify, var_p;
51755 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
51756 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
51758 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
51759 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
51760 append_to_statement_list (modify, stmts);
51762 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
51763 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
51764 TYPE_SIZE_UNIT (pointer_sized_int_node)));
51765 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
51766 append_to_statement_list (modify, stmts);
51768 return 2;
51771 /* Initialize the GCC target structure. */
51772 #undef TARGET_RETURN_IN_MEMORY
51773 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
51775 #undef TARGET_LEGITIMIZE_ADDRESS
51776 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
51778 #undef TARGET_ATTRIBUTE_TABLE
51779 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
51780 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
51781 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
51782 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51783 # undef TARGET_MERGE_DECL_ATTRIBUTES
51784 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
51785 #endif
51787 #undef TARGET_COMP_TYPE_ATTRIBUTES
51788 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
51790 #undef TARGET_INIT_BUILTINS
51791 #define TARGET_INIT_BUILTINS ix86_init_builtins
51792 #undef TARGET_BUILTIN_DECL
51793 #define TARGET_BUILTIN_DECL ix86_builtin_decl
51794 #undef TARGET_EXPAND_BUILTIN
51795 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
51797 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
51798 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
51799 ix86_builtin_vectorized_function
51801 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
51802 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
51804 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
51805 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
51807 #undef TARGET_VECTORIZE_BUILTIN_GATHER
51808 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
51810 #undef TARGET_BUILTIN_RECIPROCAL
51811 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
51813 #undef TARGET_ASM_FUNCTION_EPILOGUE
51814 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
51816 #undef TARGET_ENCODE_SECTION_INFO
51817 #ifndef SUBTARGET_ENCODE_SECTION_INFO
51818 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
51819 #else
51820 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
51821 #endif
51823 #undef TARGET_ASM_OPEN_PAREN
51824 #define TARGET_ASM_OPEN_PAREN ""
51825 #undef TARGET_ASM_CLOSE_PAREN
51826 #define TARGET_ASM_CLOSE_PAREN ""
51828 #undef TARGET_ASM_BYTE_OP
51829 #define TARGET_ASM_BYTE_OP ASM_BYTE
51831 #undef TARGET_ASM_ALIGNED_HI_OP
51832 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
51833 #undef TARGET_ASM_ALIGNED_SI_OP
51834 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
51835 #ifdef ASM_QUAD
51836 #undef TARGET_ASM_ALIGNED_DI_OP
51837 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
51838 #endif
51840 #undef TARGET_PROFILE_BEFORE_PROLOGUE
51841 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
51843 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
51844 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
51846 #undef TARGET_ASM_UNALIGNED_HI_OP
51847 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
51848 #undef TARGET_ASM_UNALIGNED_SI_OP
51849 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
51850 #undef TARGET_ASM_UNALIGNED_DI_OP
51851 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
51853 #undef TARGET_PRINT_OPERAND
51854 #define TARGET_PRINT_OPERAND ix86_print_operand
51855 #undef TARGET_PRINT_OPERAND_ADDRESS
51856 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
51857 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
51858 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
51859 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
51860 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
51862 #undef TARGET_SCHED_INIT_GLOBAL
51863 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
51864 #undef TARGET_SCHED_ADJUST_COST
51865 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
51866 #undef TARGET_SCHED_ISSUE_RATE
51867 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
51868 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
51869 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
51870 ia32_multipass_dfa_lookahead
51871 #undef TARGET_SCHED_MACRO_FUSION_P
51872 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
51873 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
51874 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
51876 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
51877 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
51879 #undef TARGET_MEMMODEL_CHECK
51880 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
51882 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
51883 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
51885 #ifdef HAVE_AS_TLS
51886 #undef TARGET_HAVE_TLS
51887 #define TARGET_HAVE_TLS true
51888 #endif
51889 #undef TARGET_CANNOT_FORCE_CONST_MEM
51890 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
51891 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
51892 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
51894 #undef TARGET_DELEGITIMIZE_ADDRESS
51895 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
51897 #undef TARGET_MS_BITFIELD_LAYOUT_P
51898 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
51900 #if TARGET_MACHO
51901 #undef TARGET_BINDS_LOCAL_P
51902 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
51903 #else
51904 #undef TARGET_BINDS_LOCAL_P
51905 #define TARGET_BINDS_LOCAL_P default_binds_local_p_2
51906 #endif
51907 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51908 #undef TARGET_BINDS_LOCAL_P
51909 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
51910 #endif
51912 #undef TARGET_ASM_OUTPUT_MI_THUNK
51913 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
51914 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
51915 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
51917 #undef TARGET_ASM_FILE_START
51918 #define TARGET_ASM_FILE_START x86_file_start
51920 #undef TARGET_OPTION_OVERRIDE
51921 #define TARGET_OPTION_OVERRIDE ix86_option_override
51923 #undef TARGET_REGISTER_MOVE_COST
51924 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
51925 #undef TARGET_MEMORY_MOVE_COST
51926 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
51927 #undef TARGET_RTX_COSTS
51928 #define TARGET_RTX_COSTS ix86_rtx_costs
51929 #undef TARGET_ADDRESS_COST
51930 #define TARGET_ADDRESS_COST ix86_address_cost
51932 #undef TARGET_FIXED_CONDITION_CODE_REGS
51933 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
51934 #undef TARGET_CC_MODES_COMPATIBLE
51935 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
51937 #undef TARGET_MACHINE_DEPENDENT_REORG
51938 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
51940 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
51941 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
51943 #undef TARGET_BUILD_BUILTIN_VA_LIST
51944 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
51946 #undef TARGET_FOLD_BUILTIN
51947 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
51949 #undef TARGET_COMPARE_VERSION_PRIORITY
51950 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
51952 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
51953 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
51954 ix86_generate_version_dispatcher_body
51956 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
51957 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
51958 ix86_get_function_versions_dispatcher
51960 #undef TARGET_ENUM_VA_LIST_P
51961 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
51963 #undef TARGET_FN_ABI_VA_LIST
51964 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
51966 #undef TARGET_CANONICAL_VA_LIST_TYPE
51967 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
51969 #undef TARGET_EXPAND_BUILTIN_VA_START
51970 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
51972 #undef TARGET_MD_ASM_CLOBBERS
51973 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
51975 #undef TARGET_PROMOTE_PROTOTYPES
51976 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
51977 #undef TARGET_SETUP_INCOMING_VARARGS
51978 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
51979 #undef TARGET_MUST_PASS_IN_STACK
51980 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
51981 #undef TARGET_FUNCTION_ARG_ADVANCE
51982 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
51983 #undef TARGET_FUNCTION_ARG
51984 #define TARGET_FUNCTION_ARG ix86_function_arg
51985 #undef TARGET_INIT_PIC_REG
51986 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
51987 #undef TARGET_USE_PSEUDO_PIC_REG
51988 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
51989 #undef TARGET_FUNCTION_ARG_BOUNDARY
51990 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
51991 #undef TARGET_PASS_BY_REFERENCE
51992 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
51993 #undef TARGET_INTERNAL_ARG_POINTER
51994 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
51995 #undef TARGET_UPDATE_STACK_BOUNDARY
51996 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
51997 #undef TARGET_GET_DRAP_RTX
51998 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
51999 #undef TARGET_STRICT_ARGUMENT_NAMING
52000 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
52001 #undef TARGET_STATIC_CHAIN
52002 #define TARGET_STATIC_CHAIN ix86_static_chain
52003 #undef TARGET_TRAMPOLINE_INIT
52004 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
52005 #undef TARGET_RETURN_POPS_ARGS
52006 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
52008 #undef TARGET_LEGITIMATE_COMBINED_INSN
52009 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
52011 #undef TARGET_ASAN_SHADOW_OFFSET
52012 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
52014 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
52015 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
52017 #undef TARGET_SCALAR_MODE_SUPPORTED_P
52018 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
52020 #undef TARGET_VECTOR_MODE_SUPPORTED_P
52021 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
52023 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
52024 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
52025 ix86_libgcc_floating_mode_supported_p
52027 #undef TARGET_C_MODE_FOR_SUFFIX
52028 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
52030 #ifdef HAVE_AS_TLS
52031 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
52032 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
52033 #endif
52035 #ifdef SUBTARGET_INSERT_ATTRIBUTES
52036 #undef TARGET_INSERT_ATTRIBUTES
52037 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
52038 #endif
52040 #undef TARGET_MANGLE_TYPE
52041 #define TARGET_MANGLE_TYPE ix86_mangle_type
52043 #if !TARGET_MACHO
52044 #undef TARGET_STACK_PROTECT_FAIL
52045 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
52046 #endif
52048 #undef TARGET_FUNCTION_VALUE
52049 #define TARGET_FUNCTION_VALUE ix86_function_value
52051 #undef TARGET_FUNCTION_VALUE_REGNO_P
52052 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
52054 #undef TARGET_PROMOTE_FUNCTION_MODE
52055 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
52057 #undef TARGET_MEMBER_TYPE_FORCES_BLK
52058 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
52060 #undef TARGET_INSTANTIATE_DECLS
52061 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
52063 #undef TARGET_SECONDARY_RELOAD
52064 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
52066 #undef TARGET_CLASS_MAX_NREGS
52067 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
52069 #undef TARGET_PREFERRED_RELOAD_CLASS
52070 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
52071 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
52072 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
52073 #undef TARGET_CLASS_LIKELY_SPILLED_P
52074 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
52076 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
52077 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
52078 ix86_builtin_vectorization_cost
52079 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
52080 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
52081 ix86_vectorize_vec_perm_const_ok
52082 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
52083 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
52084 ix86_preferred_simd_mode
52085 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
52086 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
52087 ix86_autovectorize_vector_sizes
52088 #undef TARGET_VECTORIZE_INIT_COST
52089 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
52090 #undef TARGET_VECTORIZE_ADD_STMT_COST
52091 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
52092 #undef TARGET_VECTORIZE_FINISH_COST
52093 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
52094 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
52095 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
52097 #undef TARGET_SET_CURRENT_FUNCTION
52098 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
52100 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
52101 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
52103 #undef TARGET_OPTION_SAVE
52104 #define TARGET_OPTION_SAVE ix86_function_specific_save
52106 #undef TARGET_OPTION_RESTORE
52107 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
52109 #undef TARGET_OPTION_POST_STREAM_IN
52110 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
52112 #undef TARGET_OPTION_PRINT
52113 #define TARGET_OPTION_PRINT ix86_function_specific_print
52115 #undef TARGET_OPTION_FUNCTION_VERSIONS
52116 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
52118 #undef TARGET_CAN_INLINE_P
52119 #define TARGET_CAN_INLINE_P ix86_can_inline_p
52121 #undef TARGET_EXPAND_TO_RTL_HOOK
52122 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
52124 #undef TARGET_LEGITIMATE_ADDRESS_P
52125 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
52127 #undef TARGET_LRA_P
52128 #define TARGET_LRA_P hook_bool_void_true
52130 #undef TARGET_REGISTER_PRIORITY
52131 #define TARGET_REGISTER_PRIORITY ix86_register_priority
52133 #undef TARGET_REGISTER_USAGE_LEVELING_P
52134 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
52136 #undef TARGET_LEGITIMATE_CONSTANT_P
52137 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
52139 #undef TARGET_FRAME_POINTER_REQUIRED
52140 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
52142 #undef TARGET_CAN_ELIMINATE
52143 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
52145 #undef TARGET_EXTRA_LIVE_ON_ENTRY
52146 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
52148 #undef TARGET_ASM_CODE_END
52149 #define TARGET_ASM_CODE_END ix86_code_end
52151 #undef TARGET_CONDITIONAL_REGISTER_USAGE
52152 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
52154 #if TARGET_MACHO
52155 #undef TARGET_INIT_LIBFUNCS
52156 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
52157 #endif
52159 #undef TARGET_LOOP_UNROLL_ADJUST
52160 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
52162 #undef TARGET_SPILL_CLASS
52163 #define TARGET_SPILL_CLASS ix86_spill_class
52165 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
52166 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
52167 ix86_simd_clone_compute_vecsize_and_simdlen
52169 #undef TARGET_SIMD_CLONE_ADJUST
52170 #define TARGET_SIMD_CLONE_ADJUST \
52171 ix86_simd_clone_adjust
52173 #undef TARGET_SIMD_CLONE_USABLE
52174 #define TARGET_SIMD_CLONE_USABLE \
52175 ix86_simd_clone_usable
52177 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
52178 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
52179 ix86_float_exceptions_rounding_supported_p
52181 #undef TARGET_MODE_EMIT
52182 #define TARGET_MODE_EMIT ix86_emit_mode_set
52184 #undef TARGET_MODE_NEEDED
52185 #define TARGET_MODE_NEEDED ix86_mode_needed
52187 #undef TARGET_MODE_AFTER
52188 #define TARGET_MODE_AFTER ix86_mode_after
52190 #undef TARGET_MODE_ENTRY
52191 #define TARGET_MODE_ENTRY ix86_mode_entry
52193 #undef TARGET_MODE_EXIT
52194 #define TARGET_MODE_EXIT ix86_mode_exit
52196 #undef TARGET_MODE_PRIORITY
52197 #define TARGET_MODE_PRIORITY ix86_mode_priority
52199 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
52200 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
52202 #undef TARGET_LOAD_BOUNDS_FOR_ARG
52203 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
52205 #undef TARGET_STORE_BOUNDS_FOR_ARG
52206 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
52208 #undef TARGET_LOAD_RETURNED_BOUNDS
52209 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
52211 #undef TARGET_STORE_RETURNED_BOUNDS
52212 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
52214 #undef TARGET_CHKP_BOUND_MODE
52215 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
52217 #undef TARGET_BUILTIN_CHKP_FUNCTION
52218 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
52220 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
52221 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
52223 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
52224 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
52226 #undef TARGET_CHKP_INITIALIZE_BOUNDS
52227 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
52229 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
52230 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
52232 #undef TARGET_OFFLOAD_OPTIONS
52233 #define TARGET_OFFLOAD_OPTIONS \
52234 ix86_offload_options
52236 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
52237 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
52239 struct gcc_target targetm = TARGET_INITIALIZER;
52241 #include "gt-i386.h"