* config/i386/i386.c: Use HOST_WIDE_INT_1 instead of (HOST_WIDE_INT) 1
[official-gcc.git] / gcc / config / i386 / i386.c
blobeeef3d42f8df54b8c52a25a5fe98e09c20a06cb7
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "hash-set.h"
26 #include "machmode.h"
27 #include "vec.h"
28 #include "double-int.h"
29 #include "input.h"
30 #include "alias.h"
31 #include "symtab.h"
32 #include "wide-int.h"
33 #include "inchash.h"
34 #include "tree.h"
35 #include "fold-const.h"
36 #include "stringpool.h"
37 #include "attribs.h"
38 #include "calls.h"
39 #include "stor-layout.h"
40 #include "varasm.h"
41 #include "tm_p.h"
42 #include "regs.h"
43 #include "hard-reg-set.h"
44 #include "insn-config.h"
45 #include "conditions.h"
46 #include "output.h"
47 #include "insn-codes.h"
48 #include "insn-attr.h"
49 #include "flags.h"
50 #include "except.h"
51 #include "function.h"
52 #include "recog.h"
53 #include "hashtab.h"
54 #include "statistics.h"
55 #include "real.h"
56 #include "fixed-value.h"
57 #include "expmed.h"
58 #include "dojump.h"
59 #include "explow.h"
60 #include "emit-rtl.h"
61 #include "stmt.h"
62 #include "expr.h"
63 #include "optabs.h"
64 #include "diagnostic-core.h"
65 #include "toplev.h"
66 #include "predict.h"
67 #include "dominance.h"
68 #include "cfg.h"
69 #include "cfgrtl.h"
70 #include "cfganal.h"
71 #include "lcm.h"
72 #include "cfgbuild.h"
73 #include "cfgcleanup.h"
74 #include "basic-block.h"
75 #include "ggc.h"
76 #include "target.h"
77 #include "target-def.h"
78 #include "common/common-target.h"
79 #include "langhooks.h"
80 #include "reload.h"
81 #include "hash-map.h"
82 #include "is-a.h"
83 #include "plugin-api.h"
84 #include "ipa-ref.h"
85 #include "cgraph.h"
86 #include "hash-table.h"
87 #include "tree-ssa-alias.h"
88 #include "internal-fn.h"
89 #include "gimple-fold.h"
90 #include "tree-eh.h"
91 #include "gimple-expr.h"
92 #include "gimple.h"
93 #include "gimplify.h"
94 #include "cfgloop.h"
95 #include "dwarf2.h"
96 #include "df.h"
97 #include "tm-constrs.h"
98 #include "params.h"
99 #include "cselib.h"
100 #include "debug.h"
101 #include "sched-int.h"
102 #include "sbitmap.h"
103 #include "fibheap.h"
104 #include "opts.h"
105 #include "diagnostic.h"
106 #include "dumpfile.h"
107 #include "tree-pass.h"
108 #include "context.h"
109 #include "pass_manager.h"
110 #include "target-globals.h"
111 #include "tree-vectorizer.h"
112 #include "shrink-wrap.h"
113 #include "builtins.h"
114 #include "rtl-iter.h"
115 #include "tree-iterator.h"
116 #include "tree-chkp.h"
117 #include "rtl-chkp.h"
119 static rtx legitimize_dllimport_symbol (rtx, bool);
120 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
121 static rtx legitimize_pe_coff_symbol (rtx, bool);
123 #ifndef CHECK_STACK_LIMIT
124 #define CHECK_STACK_LIMIT (-1)
125 #endif
127 /* Return index of given mode in mult and division cost tables. */
128 #define MODE_INDEX(mode) \
129 ((mode) == QImode ? 0 \
130 : (mode) == HImode ? 1 \
131 : (mode) == SImode ? 2 \
132 : (mode) == DImode ? 3 \
133 : 4)
135 /* Processor costs (relative to an add) */
136 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
137 #define COSTS_N_BYTES(N) ((N) * 2)
139 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
141 static stringop_algs ix86_size_memcpy[2] = {
142 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
143 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
144 static stringop_algs ix86_size_memset[2] = {
145 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
146 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
148 const
149 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
150 COSTS_N_BYTES (2), /* cost of an add instruction */
151 COSTS_N_BYTES (3), /* cost of a lea instruction */
152 COSTS_N_BYTES (2), /* variable shift costs */
153 COSTS_N_BYTES (3), /* constant shift costs */
154 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
155 COSTS_N_BYTES (3), /* HI */
156 COSTS_N_BYTES (3), /* SI */
157 COSTS_N_BYTES (3), /* DI */
158 COSTS_N_BYTES (5)}, /* other */
159 0, /* cost of multiply per each bit set */
160 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
161 COSTS_N_BYTES (3), /* HI */
162 COSTS_N_BYTES (3), /* SI */
163 COSTS_N_BYTES (3), /* DI */
164 COSTS_N_BYTES (5)}, /* other */
165 COSTS_N_BYTES (3), /* cost of movsx */
166 COSTS_N_BYTES (3), /* cost of movzx */
167 0, /* "large" insn */
168 2, /* MOVE_RATIO */
169 2, /* cost for loading QImode using movzbl */
170 {2, 2, 2}, /* cost of loading integer registers
171 in QImode, HImode and SImode.
172 Relative to reg-reg move (2). */
173 {2, 2, 2}, /* cost of storing integer registers */
174 2, /* cost of reg,reg fld/fst */
175 {2, 2, 2}, /* cost of loading fp registers
176 in SFmode, DFmode and XFmode */
177 {2, 2, 2}, /* cost of storing fp registers
178 in SFmode, DFmode and XFmode */
179 3, /* cost of moving MMX register */
180 {3, 3}, /* cost of loading MMX registers
181 in SImode and DImode */
182 {3, 3}, /* cost of storing MMX registers
183 in SImode and DImode */
184 3, /* cost of moving SSE register */
185 {3, 3, 3}, /* cost of loading SSE registers
186 in SImode, DImode and TImode */
187 {3, 3, 3}, /* cost of storing SSE registers
188 in SImode, DImode and TImode */
189 3, /* MMX or SSE register to integer */
190 0, /* size of l1 cache */
191 0, /* size of l2 cache */
192 0, /* size of prefetch block */
193 0, /* number of parallel prefetches */
194 2, /* Branch cost */
195 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
196 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
197 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
198 COSTS_N_BYTES (2), /* cost of FABS instruction. */
199 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
200 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
201 ix86_size_memcpy,
202 ix86_size_memset,
203 1, /* scalar_stmt_cost. */
204 1, /* scalar load_cost. */
205 1, /* scalar_store_cost. */
206 1, /* vec_stmt_cost. */
207 1, /* vec_to_scalar_cost. */
208 1, /* scalar_to_vec_cost. */
209 1, /* vec_align_load_cost. */
210 1, /* vec_unalign_load_cost. */
211 1, /* vec_store_cost. */
212 1, /* cond_taken_branch_cost. */
213 1, /* cond_not_taken_branch_cost. */
216 /* Processor costs (relative to an add) */
217 static stringop_algs i386_memcpy[2] = {
218 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
219 DUMMY_STRINGOP_ALGS};
220 static stringop_algs i386_memset[2] = {
221 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
222 DUMMY_STRINGOP_ALGS};
224 static const
225 struct processor_costs i386_cost = { /* 386 specific costs */
226 COSTS_N_INSNS (1), /* cost of an add instruction */
227 COSTS_N_INSNS (1), /* cost of a lea instruction */
228 COSTS_N_INSNS (3), /* variable shift costs */
229 COSTS_N_INSNS (2), /* constant shift costs */
230 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
231 COSTS_N_INSNS (6), /* HI */
232 COSTS_N_INSNS (6), /* SI */
233 COSTS_N_INSNS (6), /* DI */
234 COSTS_N_INSNS (6)}, /* other */
235 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
236 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
237 COSTS_N_INSNS (23), /* HI */
238 COSTS_N_INSNS (23), /* SI */
239 COSTS_N_INSNS (23), /* DI */
240 COSTS_N_INSNS (23)}, /* other */
241 COSTS_N_INSNS (3), /* cost of movsx */
242 COSTS_N_INSNS (2), /* cost of movzx */
243 15, /* "large" insn */
244 3, /* MOVE_RATIO */
245 4, /* cost for loading QImode using movzbl */
246 {2, 4, 2}, /* cost of loading integer registers
247 in QImode, HImode and SImode.
248 Relative to reg-reg move (2). */
249 {2, 4, 2}, /* cost of storing integer registers */
250 2, /* cost of reg,reg fld/fst */
251 {8, 8, 8}, /* cost of loading fp registers
252 in SFmode, DFmode and XFmode */
253 {8, 8, 8}, /* cost of storing fp registers
254 in SFmode, DFmode and XFmode */
255 2, /* cost of moving MMX register */
256 {4, 8}, /* cost of loading MMX registers
257 in SImode and DImode */
258 {4, 8}, /* cost of storing MMX registers
259 in SImode and DImode */
260 2, /* cost of moving SSE register */
261 {4, 8, 16}, /* cost of loading SSE registers
262 in SImode, DImode and TImode */
263 {4, 8, 16}, /* cost of storing SSE registers
264 in SImode, DImode and TImode */
265 3, /* MMX or SSE register to integer */
266 0, /* size of l1 cache */
267 0, /* size of l2 cache */
268 0, /* size of prefetch block */
269 0, /* number of parallel prefetches */
270 1, /* Branch cost */
271 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
272 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
273 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
274 COSTS_N_INSNS (22), /* cost of FABS instruction. */
275 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
276 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
277 i386_memcpy,
278 i386_memset,
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
292 static stringop_algs i486_memcpy[2] = {
293 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
294 DUMMY_STRINGOP_ALGS};
295 static stringop_algs i486_memset[2] = {
296 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
297 DUMMY_STRINGOP_ALGS};
299 static const
300 struct processor_costs i486_cost = { /* 486 specific costs */
301 COSTS_N_INSNS (1), /* cost of an add instruction */
302 COSTS_N_INSNS (1), /* cost of a lea instruction */
303 COSTS_N_INSNS (3), /* variable shift costs */
304 COSTS_N_INSNS (2), /* constant shift costs */
305 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
306 COSTS_N_INSNS (12), /* HI */
307 COSTS_N_INSNS (12), /* SI */
308 COSTS_N_INSNS (12), /* DI */
309 COSTS_N_INSNS (12)}, /* other */
310 1, /* cost of multiply per each bit set */
311 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
312 COSTS_N_INSNS (40), /* HI */
313 COSTS_N_INSNS (40), /* SI */
314 COSTS_N_INSNS (40), /* DI */
315 COSTS_N_INSNS (40)}, /* other */
316 COSTS_N_INSNS (3), /* cost of movsx */
317 COSTS_N_INSNS (2), /* cost of movzx */
318 15, /* "large" insn */
319 3, /* MOVE_RATIO */
320 4, /* cost for loading QImode using movzbl */
321 {2, 4, 2}, /* cost of loading integer registers
322 in QImode, HImode and SImode.
323 Relative to reg-reg move (2). */
324 {2, 4, 2}, /* cost of storing integer registers */
325 2, /* cost of reg,reg fld/fst */
326 {8, 8, 8}, /* cost of loading fp registers
327 in SFmode, DFmode and XFmode */
328 {8, 8, 8}, /* cost of storing fp registers
329 in SFmode, DFmode and XFmode */
330 2, /* cost of moving MMX register */
331 {4, 8}, /* cost of loading MMX registers
332 in SImode and DImode */
333 {4, 8}, /* cost of storing MMX registers
334 in SImode and DImode */
335 2, /* cost of moving SSE register */
336 {4, 8, 16}, /* cost of loading SSE registers
337 in SImode, DImode and TImode */
338 {4, 8, 16}, /* cost of storing SSE registers
339 in SImode, DImode and TImode */
340 3, /* MMX or SSE register to integer */
341 4, /* size of l1 cache. 486 has 8kB cache
342 shared for code and data, so 4kB is
343 not really precise. */
344 4, /* size of l2 cache */
345 0, /* size of prefetch block */
346 0, /* number of parallel prefetches */
347 1, /* Branch cost */
348 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
349 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
350 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
351 COSTS_N_INSNS (3), /* cost of FABS instruction. */
352 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
353 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
354 i486_memcpy,
355 i486_memset,
356 1, /* scalar_stmt_cost. */
357 1, /* scalar load_cost. */
358 1, /* scalar_store_cost. */
359 1, /* vec_stmt_cost. */
360 1, /* vec_to_scalar_cost. */
361 1, /* scalar_to_vec_cost. */
362 1, /* vec_align_load_cost. */
363 2, /* vec_unalign_load_cost. */
364 1, /* vec_store_cost. */
365 3, /* cond_taken_branch_cost. */
366 1, /* cond_not_taken_branch_cost. */
369 static stringop_algs pentium_memcpy[2] = {
370 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
371 DUMMY_STRINGOP_ALGS};
372 static stringop_algs pentium_memset[2] = {
373 {libcall, {{-1, rep_prefix_4_byte, false}}},
374 DUMMY_STRINGOP_ALGS};
376 static const
377 struct processor_costs pentium_cost = {
378 COSTS_N_INSNS (1), /* cost of an add instruction */
379 COSTS_N_INSNS (1), /* cost of a lea instruction */
380 COSTS_N_INSNS (4), /* variable shift costs */
381 COSTS_N_INSNS (1), /* constant shift costs */
382 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
383 COSTS_N_INSNS (11), /* HI */
384 COSTS_N_INSNS (11), /* SI */
385 COSTS_N_INSNS (11), /* DI */
386 COSTS_N_INSNS (11)}, /* other */
387 0, /* cost of multiply per each bit set */
388 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
389 COSTS_N_INSNS (25), /* HI */
390 COSTS_N_INSNS (25), /* SI */
391 COSTS_N_INSNS (25), /* DI */
392 COSTS_N_INSNS (25)}, /* other */
393 COSTS_N_INSNS (3), /* cost of movsx */
394 COSTS_N_INSNS (2), /* cost of movzx */
395 8, /* "large" insn */
396 6, /* MOVE_RATIO */
397 6, /* cost for loading QImode using movzbl */
398 {2, 4, 2}, /* cost of loading integer registers
399 in QImode, HImode and SImode.
400 Relative to reg-reg move (2). */
401 {2, 4, 2}, /* cost of storing integer registers */
402 2, /* cost of reg,reg fld/fst */
403 {2, 2, 6}, /* cost of loading fp registers
404 in SFmode, DFmode and XFmode */
405 {4, 4, 6}, /* cost of storing fp registers
406 in SFmode, DFmode and XFmode */
407 8, /* cost of moving MMX register */
408 {8, 8}, /* cost of loading MMX registers
409 in SImode and DImode */
410 {8, 8}, /* cost of storing MMX registers
411 in SImode and DImode */
412 2, /* cost of moving SSE register */
413 {4, 8, 16}, /* cost of loading SSE registers
414 in SImode, DImode and TImode */
415 {4, 8, 16}, /* cost of storing SSE registers
416 in SImode, DImode and TImode */
417 3, /* MMX or SSE register to integer */
418 8, /* size of l1 cache. */
419 8, /* size of l2 cache */
420 0, /* size of prefetch block */
421 0, /* number of parallel prefetches */
422 2, /* Branch cost */
423 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
424 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
425 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
426 COSTS_N_INSNS (1), /* cost of FABS instruction. */
427 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
428 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
429 pentium_memcpy,
430 pentium_memset,
431 1, /* scalar_stmt_cost. */
432 1, /* scalar load_cost. */
433 1, /* scalar_store_cost. */
434 1, /* vec_stmt_cost. */
435 1, /* vec_to_scalar_cost. */
436 1, /* scalar_to_vec_cost. */
437 1, /* vec_align_load_cost. */
438 2, /* vec_unalign_load_cost. */
439 1, /* vec_store_cost. */
440 3, /* cond_taken_branch_cost. */
441 1, /* cond_not_taken_branch_cost. */
444 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
445 (we ensure the alignment). For small blocks inline loop is still a
446 noticeable win, for bigger blocks either rep movsl or rep movsb is
447 way to go. Rep movsb has apparently more expensive startup time in CPU,
448 but after 4K the difference is down in the noise. */
449 static stringop_algs pentiumpro_memcpy[2] = {
450 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
451 {8192, rep_prefix_4_byte, false},
452 {-1, rep_prefix_1_byte, false}}},
453 DUMMY_STRINGOP_ALGS};
454 static stringop_algs pentiumpro_memset[2] = {
455 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
456 {8192, rep_prefix_4_byte, false},
457 {-1, libcall, false}}},
458 DUMMY_STRINGOP_ALGS};
459 static const
460 struct processor_costs pentiumpro_cost = {
461 COSTS_N_INSNS (1), /* cost of an add instruction */
462 COSTS_N_INSNS (1), /* cost of a lea instruction */
463 COSTS_N_INSNS (1), /* variable shift costs */
464 COSTS_N_INSNS (1), /* constant shift costs */
465 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
466 COSTS_N_INSNS (4), /* HI */
467 COSTS_N_INSNS (4), /* SI */
468 COSTS_N_INSNS (4), /* DI */
469 COSTS_N_INSNS (4)}, /* other */
470 0, /* cost of multiply per each bit set */
471 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
472 COSTS_N_INSNS (17), /* HI */
473 COSTS_N_INSNS (17), /* SI */
474 COSTS_N_INSNS (17), /* DI */
475 COSTS_N_INSNS (17)}, /* other */
476 COSTS_N_INSNS (1), /* cost of movsx */
477 COSTS_N_INSNS (1), /* cost of movzx */
478 8, /* "large" insn */
479 6, /* MOVE_RATIO */
480 2, /* cost for loading QImode using movzbl */
481 {4, 4, 4}, /* cost of loading integer registers
482 in QImode, HImode and SImode.
483 Relative to reg-reg move (2). */
484 {2, 2, 2}, /* cost of storing integer registers */
485 2, /* cost of reg,reg fld/fst */
486 {2, 2, 6}, /* cost of loading fp registers
487 in SFmode, DFmode and XFmode */
488 {4, 4, 6}, /* cost of storing fp registers
489 in SFmode, DFmode and XFmode */
490 2, /* cost of moving MMX register */
491 {2, 2}, /* cost of loading MMX registers
492 in SImode and DImode */
493 {2, 2}, /* cost of storing MMX registers
494 in SImode and DImode */
495 2, /* cost of moving SSE register */
496 {2, 2, 8}, /* cost of loading SSE registers
497 in SImode, DImode and TImode */
498 {2, 2, 8}, /* cost of storing SSE registers
499 in SImode, DImode and TImode */
500 3, /* MMX or SSE register to integer */
501 8, /* size of l1 cache. */
502 256, /* size of l2 cache */
503 32, /* size of prefetch block */
504 6, /* number of parallel prefetches */
505 2, /* Branch cost */
506 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
507 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
508 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
509 COSTS_N_INSNS (2), /* cost of FABS instruction. */
510 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
511 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
512 pentiumpro_memcpy,
513 pentiumpro_memset,
514 1, /* scalar_stmt_cost. */
515 1, /* scalar load_cost. */
516 1, /* scalar_store_cost. */
517 1, /* vec_stmt_cost. */
518 1, /* vec_to_scalar_cost. */
519 1, /* scalar_to_vec_cost. */
520 1, /* vec_align_load_cost. */
521 2, /* vec_unalign_load_cost. */
522 1, /* vec_store_cost. */
523 3, /* cond_taken_branch_cost. */
524 1, /* cond_not_taken_branch_cost. */
527 static stringop_algs geode_memcpy[2] = {
528 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
529 DUMMY_STRINGOP_ALGS};
530 static stringop_algs geode_memset[2] = {
531 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
532 DUMMY_STRINGOP_ALGS};
533 static const
534 struct processor_costs geode_cost = {
535 COSTS_N_INSNS (1), /* cost of an add instruction */
536 COSTS_N_INSNS (1), /* cost of a lea instruction */
537 COSTS_N_INSNS (2), /* variable shift costs */
538 COSTS_N_INSNS (1), /* constant shift costs */
539 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
540 COSTS_N_INSNS (4), /* HI */
541 COSTS_N_INSNS (7), /* SI */
542 COSTS_N_INSNS (7), /* DI */
543 COSTS_N_INSNS (7)}, /* other */
544 0, /* cost of multiply per each bit set */
545 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
546 COSTS_N_INSNS (23), /* HI */
547 COSTS_N_INSNS (39), /* SI */
548 COSTS_N_INSNS (39), /* DI */
549 COSTS_N_INSNS (39)}, /* other */
550 COSTS_N_INSNS (1), /* cost of movsx */
551 COSTS_N_INSNS (1), /* cost of movzx */
552 8, /* "large" insn */
553 4, /* MOVE_RATIO */
554 1, /* cost for loading QImode using movzbl */
555 {1, 1, 1}, /* cost of loading integer registers
556 in QImode, HImode and SImode.
557 Relative to reg-reg move (2). */
558 {1, 1, 1}, /* cost of storing integer registers */
559 1, /* cost of reg,reg fld/fst */
560 {1, 1, 1}, /* cost of loading fp registers
561 in SFmode, DFmode and XFmode */
562 {4, 6, 6}, /* cost of storing fp registers
563 in SFmode, DFmode and XFmode */
565 1, /* cost of moving MMX register */
566 {1, 1}, /* cost of loading MMX registers
567 in SImode and DImode */
568 {1, 1}, /* cost of storing MMX registers
569 in SImode and DImode */
570 1, /* cost of moving SSE register */
571 {1, 1, 1}, /* cost of loading SSE registers
572 in SImode, DImode and TImode */
573 {1, 1, 1}, /* cost of storing SSE registers
574 in SImode, DImode and TImode */
575 1, /* MMX or SSE register to integer */
576 64, /* size of l1 cache. */
577 128, /* size of l2 cache. */
578 32, /* size of prefetch block */
579 1, /* number of parallel prefetches */
580 1, /* Branch cost */
581 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
582 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
583 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
584 COSTS_N_INSNS (1), /* cost of FABS instruction. */
585 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
586 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
587 geode_memcpy,
588 geode_memset,
589 1, /* scalar_stmt_cost. */
590 1, /* scalar load_cost. */
591 1, /* scalar_store_cost. */
592 1, /* vec_stmt_cost. */
593 1, /* vec_to_scalar_cost. */
594 1, /* scalar_to_vec_cost. */
595 1, /* vec_align_load_cost. */
596 2, /* vec_unalign_load_cost. */
597 1, /* vec_store_cost. */
598 3, /* cond_taken_branch_cost. */
599 1, /* cond_not_taken_branch_cost. */
602 static stringop_algs k6_memcpy[2] = {
603 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
604 DUMMY_STRINGOP_ALGS};
605 static stringop_algs k6_memset[2] = {
606 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
607 DUMMY_STRINGOP_ALGS};
608 static const
609 struct processor_costs k6_cost = {
610 COSTS_N_INSNS (1), /* cost of an add instruction */
611 COSTS_N_INSNS (2), /* cost of a lea instruction */
612 COSTS_N_INSNS (1), /* variable shift costs */
613 COSTS_N_INSNS (1), /* constant shift costs */
614 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
615 COSTS_N_INSNS (3), /* HI */
616 COSTS_N_INSNS (3), /* SI */
617 COSTS_N_INSNS (3), /* DI */
618 COSTS_N_INSNS (3)}, /* other */
619 0, /* cost of multiply per each bit set */
620 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
621 COSTS_N_INSNS (18), /* HI */
622 COSTS_N_INSNS (18), /* SI */
623 COSTS_N_INSNS (18), /* DI */
624 COSTS_N_INSNS (18)}, /* other */
625 COSTS_N_INSNS (2), /* cost of movsx */
626 COSTS_N_INSNS (2), /* cost of movzx */
627 8, /* "large" insn */
628 4, /* MOVE_RATIO */
629 3, /* cost for loading QImode using movzbl */
630 {4, 5, 4}, /* cost of loading integer registers
631 in QImode, HImode and SImode.
632 Relative to reg-reg move (2). */
633 {2, 3, 2}, /* cost of storing integer registers */
634 4, /* cost of reg,reg fld/fst */
635 {6, 6, 6}, /* cost of loading fp registers
636 in SFmode, DFmode and XFmode */
637 {4, 4, 4}, /* cost of storing fp registers
638 in SFmode, DFmode and XFmode */
639 2, /* cost of moving MMX register */
640 {2, 2}, /* cost of loading MMX registers
641 in SImode and DImode */
642 {2, 2}, /* cost of storing MMX registers
643 in SImode and DImode */
644 2, /* cost of moving SSE register */
645 {2, 2, 8}, /* cost of loading SSE registers
646 in SImode, DImode and TImode */
647 {2, 2, 8}, /* cost of storing SSE registers
648 in SImode, DImode and TImode */
649 6, /* MMX or SSE register to integer */
650 32, /* size of l1 cache. */
651 32, /* size of l2 cache. Some models
652 have integrated l2 cache, but
653 optimizing for k6 is not important
654 enough to worry about that. */
655 32, /* size of prefetch block */
656 1, /* number of parallel prefetches */
657 1, /* Branch cost */
658 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
659 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
660 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
661 COSTS_N_INSNS (2), /* cost of FABS instruction. */
662 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
663 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
664 k6_memcpy,
665 k6_memset,
666 1, /* scalar_stmt_cost. */
667 1, /* scalar load_cost. */
668 1, /* scalar_store_cost. */
669 1, /* vec_stmt_cost. */
670 1, /* vec_to_scalar_cost. */
671 1, /* scalar_to_vec_cost. */
672 1, /* vec_align_load_cost. */
673 2, /* vec_unalign_load_cost. */
674 1, /* vec_store_cost. */
675 3, /* cond_taken_branch_cost. */
676 1, /* cond_not_taken_branch_cost. */
679 /* For some reason, Athlon deals better with REP prefix (relative to loops)
680 compared to K8. Alignment becomes important after 8 bytes for memcpy and
681 128 bytes for memset. */
682 static stringop_algs athlon_memcpy[2] = {
683 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
684 DUMMY_STRINGOP_ALGS};
685 static stringop_algs athlon_memset[2] = {
686 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
687 DUMMY_STRINGOP_ALGS};
688 static const
689 struct processor_costs athlon_cost = {
690 COSTS_N_INSNS (1), /* cost of an add instruction */
691 COSTS_N_INSNS (2), /* cost of a lea instruction */
692 COSTS_N_INSNS (1), /* variable shift costs */
693 COSTS_N_INSNS (1), /* constant shift costs */
694 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
695 COSTS_N_INSNS (5), /* HI */
696 COSTS_N_INSNS (5), /* SI */
697 COSTS_N_INSNS (5), /* DI */
698 COSTS_N_INSNS (5)}, /* other */
699 0, /* cost of multiply per each bit set */
700 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
701 COSTS_N_INSNS (26), /* HI */
702 COSTS_N_INSNS (42), /* SI */
703 COSTS_N_INSNS (74), /* DI */
704 COSTS_N_INSNS (74)}, /* other */
705 COSTS_N_INSNS (1), /* cost of movsx */
706 COSTS_N_INSNS (1), /* cost of movzx */
707 8, /* "large" insn */
708 9, /* MOVE_RATIO */
709 4, /* cost for loading QImode using movzbl */
710 {3, 4, 3}, /* cost of loading integer registers
711 in QImode, HImode and SImode.
712 Relative to reg-reg move (2). */
713 {3, 4, 3}, /* cost of storing integer registers */
714 4, /* cost of reg,reg fld/fst */
715 {4, 4, 12}, /* cost of loading fp registers
716 in SFmode, DFmode and XFmode */
717 {6, 6, 8}, /* cost of storing fp registers
718 in SFmode, DFmode and XFmode */
719 2, /* cost of moving MMX register */
720 {4, 4}, /* cost of loading MMX registers
721 in SImode and DImode */
722 {4, 4}, /* cost of storing MMX registers
723 in SImode and DImode */
724 2, /* cost of moving SSE register */
725 {4, 4, 6}, /* cost of loading SSE registers
726 in SImode, DImode and TImode */
727 {4, 4, 5}, /* cost of storing SSE registers
728 in SImode, DImode and TImode */
729 5, /* MMX or SSE register to integer */
730 64, /* size of l1 cache. */
731 256, /* size of l2 cache. */
732 64, /* size of prefetch block */
733 6, /* number of parallel prefetches */
734 5, /* Branch cost */
735 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
736 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
737 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
738 COSTS_N_INSNS (2), /* cost of FABS instruction. */
739 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
740 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
741 athlon_memcpy,
742 athlon_memset,
743 1, /* scalar_stmt_cost. */
744 1, /* scalar load_cost. */
745 1, /* scalar_store_cost. */
746 1, /* vec_stmt_cost. */
747 1, /* vec_to_scalar_cost. */
748 1, /* scalar_to_vec_cost. */
749 1, /* vec_align_load_cost. */
750 2, /* vec_unalign_load_cost. */
751 1, /* vec_store_cost. */
752 3, /* cond_taken_branch_cost. */
753 1, /* cond_not_taken_branch_cost. */
756 /* K8 has optimized REP instruction for medium sized blocks, but for very
757 small blocks it is better to use loop. For large blocks, libcall can
758 do nontemporary accesses and beat inline considerably. */
759 static stringop_algs k8_memcpy[2] = {
760 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
761 {-1, rep_prefix_4_byte, false}}},
762 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
763 {-1, libcall, false}}}};
764 static stringop_algs k8_memset[2] = {
765 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
766 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
767 {libcall, {{48, unrolled_loop, false},
768 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
769 static const
770 struct processor_costs k8_cost = {
771 COSTS_N_INSNS (1), /* cost of an add instruction */
772 COSTS_N_INSNS (2), /* cost of a lea instruction */
773 COSTS_N_INSNS (1), /* variable shift costs */
774 COSTS_N_INSNS (1), /* constant shift costs */
775 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
776 COSTS_N_INSNS (4), /* HI */
777 COSTS_N_INSNS (3), /* SI */
778 COSTS_N_INSNS (4), /* DI */
779 COSTS_N_INSNS (5)}, /* other */
780 0, /* cost of multiply per each bit set */
781 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
782 COSTS_N_INSNS (26), /* HI */
783 COSTS_N_INSNS (42), /* SI */
784 COSTS_N_INSNS (74), /* DI */
785 COSTS_N_INSNS (74)}, /* other */
786 COSTS_N_INSNS (1), /* cost of movsx */
787 COSTS_N_INSNS (1), /* cost of movzx */
788 8, /* "large" insn */
789 9, /* MOVE_RATIO */
790 4, /* cost for loading QImode using movzbl */
791 {3, 4, 3}, /* cost of loading integer registers
792 in QImode, HImode and SImode.
793 Relative to reg-reg move (2). */
794 {3, 4, 3}, /* cost of storing integer registers */
795 4, /* cost of reg,reg fld/fst */
796 {4, 4, 12}, /* cost of loading fp registers
797 in SFmode, DFmode and XFmode */
798 {6, 6, 8}, /* cost of storing fp registers
799 in SFmode, DFmode and XFmode */
800 2, /* cost of moving MMX register */
801 {3, 3}, /* cost of loading MMX registers
802 in SImode and DImode */
803 {4, 4}, /* cost of storing MMX registers
804 in SImode and DImode */
805 2, /* cost of moving SSE register */
806 {4, 3, 6}, /* cost of loading SSE registers
807 in SImode, DImode and TImode */
808 {4, 4, 5}, /* cost of storing SSE registers
809 in SImode, DImode and TImode */
810 5, /* MMX or SSE register to integer */
811 64, /* size of l1 cache. */
812 512, /* size of l2 cache. */
813 64, /* size of prefetch block */
814 /* New AMD processors never drop prefetches; if they cannot be performed
815 immediately, they are queued. We set number of simultaneous prefetches
816 to a large constant to reflect this (it probably is not a good idea not
817 to limit number of prefetches at all, as their execution also takes some
818 time). */
819 100, /* number of parallel prefetches */
820 3, /* Branch cost */
821 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
822 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
823 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
824 COSTS_N_INSNS (2), /* cost of FABS instruction. */
825 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
826 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
828 k8_memcpy,
829 k8_memset,
830 4, /* scalar_stmt_cost. */
831 2, /* scalar load_cost. */
832 2, /* scalar_store_cost. */
833 5, /* vec_stmt_cost. */
834 0, /* vec_to_scalar_cost. */
835 2, /* scalar_to_vec_cost. */
836 2, /* vec_align_load_cost. */
837 3, /* vec_unalign_load_cost. */
838 3, /* vec_store_cost. */
839 3, /* cond_taken_branch_cost. */
840 2, /* cond_not_taken_branch_cost. */
843 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
844 very small blocks it is better to use loop. For large blocks, libcall can
845 do nontemporary accesses and beat inline considerably. */
846 static stringop_algs amdfam10_memcpy[2] = {
847 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
848 {-1, rep_prefix_4_byte, false}}},
849 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
850 {-1, libcall, false}}}};
851 static stringop_algs amdfam10_memset[2] = {
852 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
853 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
854 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
855 {-1, libcall, false}}}};
856 struct processor_costs amdfam10_cost = {
857 COSTS_N_INSNS (1), /* cost of an add instruction */
858 COSTS_N_INSNS (2), /* cost of a lea instruction */
859 COSTS_N_INSNS (1), /* variable shift costs */
860 COSTS_N_INSNS (1), /* constant shift costs */
861 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
862 COSTS_N_INSNS (4), /* HI */
863 COSTS_N_INSNS (3), /* SI */
864 COSTS_N_INSNS (4), /* DI */
865 COSTS_N_INSNS (5)}, /* other */
866 0, /* cost of multiply per each bit set */
867 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
868 COSTS_N_INSNS (35), /* HI */
869 COSTS_N_INSNS (51), /* SI */
870 COSTS_N_INSNS (83), /* DI */
871 COSTS_N_INSNS (83)}, /* other */
872 COSTS_N_INSNS (1), /* cost of movsx */
873 COSTS_N_INSNS (1), /* cost of movzx */
874 8, /* "large" insn */
875 9, /* MOVE_RATIO */
876 4, /* cost for loading QImode using movzbl */
877 {3, 4, 3}, /* cost of loading integer registers
878 in QImode, HImode and SImode.
879 Relative to reg-reg move (2). */
880 {3, 4, 3}, /* cost of storing integer registers */
881 4, /* cost of reg,reg fld/fst */
882 {4, 4, 12}, /* cost of loading fp registers
883 in SFmode, DFmode and XFmode */
884 {6, 6, 8}, /* cost of storing fp registers
885 in SFmode, DFmode and XFmode */
886 2, /* cost of moving MMX register */
887 {3, 3}, /* cost of loading MMX registers
888 in SImode and DImode */
889 {4, 4}, /* cost of storing MMX registers
890 in SImode and DImode */
891 2, /* cost of moving SSE register */
892 {4, 4, 3}, /* cost of loading SSE registers
893 in SImode, DImode and TImode */
894 {4, 4, 5}, /* cost of storing SSE registers
895 in SImode, DImode and TImode */
896 3, /* MMX or SSE register to integer */
897 /* On K8:
898 MOVD reg64, xmmreg Double FSTORE 4
899 MOVD reg32, xmmreg Double FSTORE 4
900 On AMDFAM10:
901 MOVD reg64, xmmreg Double FADD 3
902 1/1 1/1
903 MOVD reg32, xmmreg Double FADD 3
904 1/1 1/1 */
905 64, /* size of l1 cache. */
906 512, /* size of l2 cache. */
907 64, /* size of prefetch block */
908 /* New AMD processors never drop prefetches; if they cannot be performed
909 immediately, they are queued. We set number of simultaneous prefetches
910 to a large constant to reflect this (it probably is not a good idea not
911 to limit number of prefetches at all, as their execution also takes some
912 time). */
913 100, /* number of parallel prefetches */
914 2, /* Branch cost */
915 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
916 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
917 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
918 COSTS_N_INSNS (2), /* cost of FABS instruction. */
919 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
920 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
922 amdfam10_memcpy,
923 amdfam10_memset,
924 4, /* scalar_stmt_cost. */
925 2, /* scalar load_cost. */
926 2, /* scalar_store_cost. */
927 6, /* vec_stmt_cost. */
928 0, /* vec_to_scalar_cost. */
929 2, /* scalar_to_vec_cost. */
930 2, /* vec_align_load_cost. */
931 2, /* vec_unalign_load_cost. */
932 2, /* vec_store_cost. */
933 2, /* cond_taken_branch_cost. */
934 1, /* cond_not_taken_branch_cost. */
937 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
938 very small blocks it is better to use loop. For large blocks, libcall
939 can do nontemporary accesses and beat inline considerably. */
940 static stringop_algs bdver1_memcpy[2] = {
941 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
942 {-1, rep_prefix_4_byte, false}}},
943 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
944 {-1, libcall, false}}}};
945 static stringop_algs bdver1_memset[2] = {
946 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
947 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
948 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
949 {-1, libcall, false}}}};
951 const struct processor_costs bdver1_cost = {
952 COSTS_N_INSNS (1), /* cost of an add instruction */
953 COSTS_N_INSNS (1), /* cost of a lea instruction */
954 COSTS_N_INSNS (1), /* variable shift costs */
955 COSTS_N_INSNS (1), /* constant shift costs */
956 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
957 COSTS_N_INSNS (4), /* HI */
958 COSTS_N_INSNS (4), /* SI */
959 COSTS_N_INSNS (6), /* DI */
960 COSTS_N_INSNS (6)}, /* other */
961 0, /* cost of multiply per each bit set */
962 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
963 COSTS_N_INSNS (35), /* HI */
964 COSTS_N_INSNS (51), /* SI */
965 COSTS_N_INSNS (83), /* DI */
966 COSTS_N_INSNS (83)}, /* other */
967 COSTS_N_INSNS (1), /* cost of movsx */
968 COSTS_N_INSNS (1), /* cost of movzx */
969 8, /* "large" insn */
970 9, /* MOVE_RATIO */
971 4, /* cost for loading QImode using movzbl */
972 {5, 5, 4}, /* cost of loading integer registers
973 in QImode, HImode and SImode.
974 Relative to reg-reg move (2). */
975 {4, 4, 4}, /* cost of storing integer registers */
976 2, /* cost of reg,reg fld/fst */
977 {5, 5, 12}, /* cost of loading fp registers
978 in SFmode, DFmode and XFmode */
979 {4, 4, 8}, /* cost of storing fp registers
980 in SFmode, DFmode and XFmode */
981 2, /* cost of moving MMX register */
982 {4, 4}, /* cost of loading MMX registers
983 in SImode and DImode */
984 {4, 4}, /* cost of storing MMX registers
985 in SImode and DImode */
986 2, /* cost of moving SSE register */
987 {4, 4, 4}, /* cost of loading SSE registers
988 in SImode, DImode and TImode */
989 {4, 4, 4}, /* cost of storing SSE registers
990 in SImode, DImode and TImode */
991 2, /* MMX or SSE register to integer */
992 /* On K8:
993 MOVD reg64, xmmreg Double FSTORE 4
994 MOVD reg32, xmmreg Double FSTORE 4
995 On AMDFAM10:
996 MOVD reg64, xmmreg Double FADD 3
997 1/1 1/1
998 MOVD reg32, xmmreg Double FADD 3
999 1/1 1/1 */
1000 16, /* size of l1 cache. */
1001 2048, /* size of l2 cache. */
1002 64, /* size of prefetch block */
1003 /* New AMD processors never drop prefetches; if they cannot be performed
1004 immediately, they are queued. We set number of simultaneous prefetches
1005 to a large constant to reflect this (it probably is not a good idea not
1006 to limit number of prefetches at all, as their execution also takes some
1007 time). */
1008 100, /* number of parallel prefetches */
1009 2, /* Branch cost */
1010 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1011 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1012 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1013 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1014 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1015 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1017 bdver1_memcpy,
1018 bdver1_memset,
1019 6, /* scalar_stmt_cost. */
1020 4, /* scalar load_cost. */
1021 4, /* scalar_store_cost. */
1022 6, /* vec_stmt_cost. */
1023 0, /* vec_to_scalar_cost. */
1024 2, /* scalar_to_vec_cost. */
1025 4, /* vec_align_load_cost. */
1026 4, /* vec_unalign_load_cost. */
1027 4, /* vec_store_cost. */
1028 4, /* cond_taken_branch_cost. */
1029 2, /* cond_not_taken_branch_cost. */
1032 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1033 very small blocks it is better to use loop. For large blocks, libcall
1034 can do nontemporary accesses and beat inline considerably. */
1036 static stringop_algs bdver2_memcpy[2] = {
1037 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1038 {-1, rep_prefix_4_byte, false}}},
1039 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1040 {-1, libcall, false}}}};
1041 static stringop_algs bdver2_memset[2] = {
1042 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1043 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1044 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1045 {-1, libcall, false}}}};
1047 const struct processor_costs bdver2_cost = {
1048 COSTS_N_INSNS (1), /* cost of an add instruction */
1049 COSTS_N_INSNS (1), /* cost of a lea instruction */
1050 COSTS_N_INSNS (1), /* variable shift costs */
1051 COSTS_N_INSNS (1), /* constant shift costs */
1052 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1053 COSTS_N_INSNS (4), /* HI */
1054 COSTS_N_INSNS (4), /* SI */
1055 COSTS_N_INSNS (6), /* DI */
1056 COSTS_N_INSNS (6)}, /* other */
1057 0, /* cost of multiply per each bit set */
1058 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1059 COSTS_N_INSNS (35), /* HI */
1060 COSTS_N_INSNS (51), /* SI */
1061 COSTS_N_INSNS (83), /* DI */
1062 COSTS_N_INSNS (83)}, /* other */
1063 COSTS_N_INSNS (1), /* cost of movsx */
1064 COSTS_N_INSNS (1), /* cost of movzx */
1065 8, /* "large" insn */
1066 9, /* MOVE_RATIO */
1067 4, /* cost for loading QImode using movzbl */
1068 {5, 5, 4}, /* cost of loading integer registers
1069 in QImode, HImode and SImode.
1070 Relative to reg-reg move (2). */
1071 {4, 4, 4}, /* cost of storing integer registers */
1072 2, /* cost of reg,reg fld/fst */
1073 {5, 5, 12}, /* cost of loading fp registers
1074 in SFmode, DFmode and XFmode */
1075 {4, 4, 8}, /* cost of storing fp registers
1076 in SFmode, DFmode and XFmode */
1077 2, /* cost of moving MMX register */
1078 {4, 4}, /* cost of loading MMX registers
1079 in SImode and DImode */
1080 {4, 4}, /* cost of storing MMX registers
1081 in SImode and DImode */
1082 2, /* cost of moving SSE register */
1083 {4, 4, 4}, /* cost of loading SSE registers
1084 in SImode, DImode and TImode */
1085 {4, 4, 4}, /* cost of storing SSE registers
1086 in SImode, DImode and TImode */
1087 2, /* MMX or SSE register to integer */
1088 /* On K8:
1089 MOVD reg64, xmmreg Double FSTORE 4
1090 MOVD reg32, xmmreg Double FSTORE 4
1091 On AMDFAM10:
1092 MOVD reg64, xmmreg Double FADD 3
1093 1/1 1/1
1094 MOVD reg32, xmmreg Double FADD 3
1095 1/1 1/1 */
1096 16, /* size of l1 cache. */
1097 2048, /* size of l2 cache. */
1098 64, /* size of prefetch block */
1099 /* New AMD processors never drop prefetches; if they cannot be performed
1100 immediately, they are queued. We set number of simultaneous prefetches
1101 to a large constant to reflect this (it probably is not a good idea not
1102 to limit number of prefetches at all, as their execution also takes some
1103 time). */
1104 100, /* number of parallel prefetches */
1105 2, /* Branch cost */
1106 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1107 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1108 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1109 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1110 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1111 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1113 bdver2_memcpy,
1114 bdver2_memset,
1115 6, /* scalar_stmt_cost. */
1116 4, /* scalar load_cost. */
1117 4, /* scalar_store_cost. */
1118 6, /* vec_stmt_cost. */
1119 0, /* vec_to_scalar_cost. */
1120 2, /* scalar_to_vec_cost. */
1121 4, /* vec_align_load_cost. */
1122 4, /* vec_unalign_load_cost. */
1123 4, /* vec_store_cost. */
1124 4, /* cond_taken_branch_cost. */
1125 2, /* cond_not_taken_branch_cost. */
1129 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1130 very small blocks it is better to use loop. For large blocks, libcall
1131 can do nontemporary accesses and beat inline considerably. */
1132 static stringop_algs bdver3_memcpy[2] = {
1133 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1134 {-1, rep_prefix_4_byte, false}}},
1135 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1136 {-1, libcall, false}}}};
1137 static stringop_algs bdver3_memset[2] = {
1138 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1139 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1140 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1141 {-1, libcall, false}}}};
1142 struct processor_costs bdver3_cost = {
1143 COSTS_N_INSNS (1), /* cost of an add instruction */
1144 COSTS_N_INSNS (1), /* cost of a lea instruction */
1145 COSTS_N_INSNS (1), /* variable shift costs */
1146 COSTS_N_INSNS (1), /* constant shift costs */
1147 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1148 COSTS_N_INSNS (4), /* HI */
1149 COSTS_N_INSNS (4), /* SI */
1150 COSTS_N_INSNS (6), /* DI */
1151 COSTS_N_INSNS (6)}, /* other */
1152 0, /* cost of multiply per each bit set */
1153 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1154 COSTS_N_INSNS (35), /* HI */
1155 COSTS_N_INSNS (51), /* SI */
1156 COSTS_N_INSNS (83), /* DI */
1157 COSTS_N_INSNS (83)}, /* other */
1158 COSTS_N_INSNS (1), /* cost of movsx */
1159 COSTS_N_INSNS (1), /* cost of movzx */
1160 8, /* "large" insn */
1161 9, /* MOVE_RATIO */
1162 4, /* cost for loading QImode using movzbl */
1163 {5, 5, 4}, /* cost of loading integer registers
1164 in QImode, HImode and SImode.
1165 Relative to reg-reg move (2). */
1166 {4, 4, 4}, /* cost of storing integer registers */
1167 2, /* cost of reg,reg fld/fst */
1168 {5, 5, 12}, /* cost of loading fp registers
1169 in SFmode, DFmode and XFmode */
1170 {4, 4, 8}, /* cost of storing fp registers
1171 in SFmode, DFmode and XFmode */
1172 2, /* cost of moving MMX register */
1173 {4, 4}, /* cost of loading MMX registers
1174 in SImode and DImode */
1175 {4, 4}, /* cost of storing MMX registers
1176 in SImode and DImode */
1177 2, /* cost of moving SSE register */
1178 {4, 4, 4}, /* cost of loading SSE registers
1179 in SImode, DImode and TImode */
1180 {4, 4, 4}, /* cost of storing SSE registers
1181 in SImode, DImode and TImode */
1182 2, /* MMX or SSE register to integer */
1183 16, /* size of l1 cache. */
1184 2048, /* size of l2 cache. */
1185 64, /* size of prefetch block */
1186 /* New AMD processors never drop prefetches; if they cannot be performed
1187 immediately, they are queued. We set number of simultaneous prefetches
1188 to a large constant to reflect this (it probably is not a good idea not
1189 to limit number of prefetches at all, as their execution also takes some
1190 time). */
1191 100, /* number of parallel prefetches */
1192 2, /* Branch cost */
1193 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1194 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1195 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1196 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1197 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1198 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1200 bdver3_memcpy,
1201 bdver3_memset,
1202 6, /* scalar_stmt_cost. */
1203 4, /* scalar load_cost. */
1204 4, /* scalar_store_cost. */
1205 6, /* vec_stmt_cost. */
1206 0, /* vec_to_scalar_cost. */
1207 2, /* scalar_to_vec_cost. */
1208 4, /* vec_align_load_cost. */
1209 4, /* vec_unalign_load_cost. */
1210 4, /* vec_store_cost. */
1211 4, /* cond_taken_branch_cost. */
1212 2, /* cond_not_taken_branch_cost. */
1215 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1216 very small blocks it is better to use loop. For large blocks, libcall
1217 can do nontemporary accesses and beat inline considerably. */
1218 static stringop_algs bdver4_memcpy[2] = {
1219 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1220 {-1, rep_prefix_4_byte, false}}},
1221 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1222 {-1, libcall, false}}}};
1223 static stringop_algs bdver4_memset[2] = {
1224 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1225 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1226 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1227 {-1, libcall, false}}}};
1228 struct processor_costs bdver4_cost = {
1229 COSTS_N_INSNS (1), /* cost of an add instruction */
1230 COSTS_N_INSNS (1), /* cost of a lea instruction */
1231 COSTS_N_INSNS (1), /* variable shift costs */
1232 COSTS_N_INSNS (1), /* constant shift costs */
1233 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1234 COSTS_N_INSNS (4), /* HI */
1235 COSTS_N_INSNS (4), /* SI */
1236 COSTS_N_INSNS (6), /* DI */
1237 COSTS_N_INSNS (6)}, /* other */
1238 0, /* cost of multiply per each bit set */
1239 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1240 COSTS_N_INSNS (35), /* HI */
1241 COSTS_N_INSNS (51), /* SI */
1242 COSTS_N_INSNS (83), /* DI */
1243 COSTS_N_INSNS (83)}, /* other */
1244 COSTS_N_INSNS (1), /* cost of movsx */
1245 COSTS_N_INSNS (1), /* cost of movzx */
1246 8, /* "large" insn */
1247 9, /* MOVE_RATIO */
1248 4, /* cost for loading QImode using movzbl */
1249 {5, 5, 4}, /* cost of loading integer registers
1250 in QImode, HImode and SImode.
1251 Relative to reg-reg move (2). */
1252 {4, 4, 4}, /* cost of storing integer registers */
1253 2, /* cost of reg,reg fld/fst */
1254 {5, 5, 12}, /* cost of loading fp registers
1255 in SFmode, DFmode and XFmode */
1256 {4, 4, 8}, /* cost of storing fp registers
1257 in SFmode, DFmode and XFmode */
1258 2, /* cost of moving MMX register */
1259 {4, 4}, /* cost of loading MMX registers
1260 in SImode and DImode */
1261 {4, 4}, /* cost of storing MMX registers
1262 in SImode and DImode */
1263 2, /* cost of moving SSE register */
1264 {4, 4, 4}, /* cost of loading SSE registers
1265 in SImode, DImode and TImode */
1266 {4, 4, 4}, /* cost of storing SSE registers
1267 in SImode, DImode and TImode */
1268 2, /* MMX or SSE register to integer */
1269 16, /* size of l1 cache. */
1270 2048, /* size of l2 cache. */
1271 64, /* size of prefetch block */
1272 /* New AMD processors never drop prefetches; if they cannot be performed
1273 immediately, they are queued. We set number of simultaneous prefetches
1274 to a large constant to reflect this (it probably is not a good idea not
1275 to limit number of prefetches at all, as their execution also takes some
1276 time). */
1277 100, /* number of parallel prefetches */
1278 2, /* Branch cost */
1279 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1280 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1281 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1282 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1283 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1284 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1286 bdver4_memcpy,
1287 bdver4_memset,
1288 6, /* scalar_stmt_cost. */
1289 4, /* scalar load_cost. */
1290 4, /* scalar_store_cost. */
1291 6, /* vec_stmt_cost. */
1292 0, /* vec_to_scalar_cost. */
1293 2, /* scalar_to_vec_cost. */
1294 4, /* vec_align_load_cost. */
1295 4, /* vec_unalign_load_cost. */
1296 4, /* vec_store_cost. */
1297 4, /* cond_taken_branch_cost. */
1298 2, /* cond_not_taken_branch_cost. */
1301 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1302 very small blocks it is better to use loop. For large blocks, libcall can
1303 do nontemporary accesses and beat inline considerably. */
1304 static stringop_algs btver1_memcpy[2] = {
1305 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1306 {-1, rep_prefix_4_byte, false}}},
1307 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1308 {-1, libcall, false}}}};
1309 static stringop_algs btver1_memset[2] = {
1310 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1311 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1312 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1313 {-1, libcall, false}}}};
1314 const struct processor_costs btver1_cost = {
1315 COSTS_N_INSNS (1), /* cost of an add instruction */
1316 COSTS_N_INSNS (2), /* cost of a lea instruction */
1317 COSTS_N_INSNS (1), /* variable shift costs */
1318 COSTS_N_INSNS (1), /* constant shift costs */
1319 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1320 COSTS_N_INSNS (4), /* HI */
1321 COSTS_N_INSNS (3), /* SI */
1322 COSTS_N_INSNS (4), /* DI */
1323 COSTS_N_INSNS (5)}, /* other */
1324 0, /* cost of multiply per each bit set */
1325 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1326 COSTS_N_INSNS (35), /* HI */
1327 COSTS_N_INSNS (51), /* SI */
1328 COSTS_N_INSNS (83), /* DI */
1329 COSTS_N_INSNS (83)}, /* other */
1330 COSTS_N_INSNS (1), /* cost of movsx */
1331 COSTS_N_INSNS (1), /* cost of movzx */
1332 8, /* "large" insn */
1333 9, /* MOVE_RATIO */
1334 4, /* cost for loading QImode using movzbl */
1335 {3, 4, 3}, /* cost of loading integer registers
1336 in QImode, HImode and SImode.
1337 Relative to reg-reg move (2). */
1338 {3, 4, 3}, /* cost of storing integer registers */
1339 4, /* cost of reg,reg fld/fst */
1340 {4, 4, 12}, /* cost of loading fp registers
1341 in SFmode, DFmode and XFmode */
1342 {6, 6, 8}, /* cost of storing fp registers
1343 in SFmode, DFmode and XFmode */
1344 2, /* cost of moving MMX register */
1345 {3, 3}, /* cost of loading MMX registers
1346 in SImode and DImode */
1347 {4, 4}, /* cost of storing MMX registers
1348 in SImode and DImode */
1349 2, /* cost of moving SSE register */
1350 {4, 4, 3}, /* cost of loading SSE registers
1351 in SImode, DImode and TImode */
1352 {4, 4, 5}, /* cost of storing SSE registers
1353 in SImode, DImode and TImode */
1354 3, /* MMX or SSE register to integer */
1355 /* On K8:
1356 MOVD reg64, xmmreg Double FSTORE 4
1357 MOVD reg32, xmmreg Double FSTORE 4
1358 On AMDFAM10:
1359 MOVD reg64, xmmreg Double FADD 3
1360 1/1 1/1
1361 MOVD reg32, xmmreg Double FADD 3
1362 1/1 1/1 */
1363 32, /* size of l1 cache. */
1364 512, /* size of l2 cache. */
1365 64, /* size of prefetch block */
1366 100, /* number of parallel prefetches */
1367 2, /* Branch cost */
1368 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1369 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1370 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1371 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1372 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1373 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1375 btver1_memcpy,
1376 btver1_memset,
1377 4, /* scalar_stmt_cost. */
1378 2, /* scalar load_cost. */
1379 2, /* scalar_store_cost. */
1380 6, /* vec_stmt_cost. */
1381 0, /* vec_to_scalar_cost. */
1382 2, /* scalar_to_vec_cost. */
1383 2, /* vec_align_load_cost. */
1384 2, /* vec_unalign_load_cost. */
1385 2, /* vec_store_cost. */
1386 2, /* cond_taken_branch_cost. */
1387 1, /* cond_not_taken_branch_cost. */
1390 static stringop_algs btver2_memcpy[2] = {
1391 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1392 {-1, rep_prefix_4_byte, false}}},
1393 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1394 {-1, libcall, false}}}};
1395 static stringop_algs btver2_memset[2] = {
1396 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1397 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1398 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1399 {-1, libcall, false}}}};
1400 const struct processor_costs btver2_cost = {
1401 COSTS_N_INSNS (1), /* cost of an add instruction */
1402 COSTS_N_INSNS (2), /* cost of a lea instruction */
1403 COSTS_N_INSNS (1), /* variable shift costs */
1404 COSTS_N_INSNS (1), /* constant shift costs */
1405 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1406 COSTS_N_INSNS (4), /* HI */
1407 COSTS_N_INSNS (3), /* SI */
1408 COSTS_N_INSNS (4), /* DI */
1409 COSTS_N_INSNS (5)}, /* other */
1410 0, /* cost of multiply per each bit set */
1411 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1412 COSTS_N_INSNS (35), /* HI */
1413 COSTS_N_INSNS (51), /* SI */
1414 COSTS_N_INSNS (83), /* DI */
1415 COSTS_N_INSNS (83)}, /* other */
1416 COSTS_N_INSNS (1), /* cost of movsx */
1417 COSTS_N_INSNS (1), /* cost of movzx */
1418 8, /* "large" insn */
1419 9, /* MOVE_RATIO */
1420 4, /* cost for loading QImode using movzbl */
1421 {3, 4, 3}, /* cost of loading integer registers
1422 in QImode, HImode and SImode.
1423 Relative to reg-reg move (2). */
1424 {3, 4, 3}, /* cost of storing integer registers */
1425 4, /* cost of reg,reg fld/fst */
1426 {4, 4, 12}, /* cost of loading fp registers
1427 in SFmode, DFmode and XFmode */
1428 {6, 6, 8}, /* cost of storing fp registers
1429 in SFmode, DFmode and XFmode */
1430 2, /* cost of moving MMX register */
1431 {3, 3}, /* cost of loading MMX registers
1432 in SImode and DImode */
1433 {4, 4}, /* cost of storing MMX registers
1434 in SImode and DImode */
1435 2, /* cost of moving SSE register */
1436 {4, 4, 3}, /* cost of loading SSE registers
1437 in SImode, DImode and TImode */
1438 {4, 4, 5}, /* cost of storing SSE registers
1439 in SImode, DImode and TImode */
1440 3, /* MMX or SSE register to integer */
1441 /* On K8:
1442 MOVD reg64, xmmreg Double FSTORE 4
1443 MOVD reg32, xmmreg Double FSTORE 4
1444 On AMDFAM10:
1445 MOVD reg64, xmmreg Double FADD 3
1446 1/1 1/1
1447 MOVD reg32, xmmreg Double FADD 3
1448 1/1 1/1 */
1449 32, /* size of l1 cache. */
1450 2048, /* size of l2 cache. */
1451 64, /* size of prefetch block */
1452 100, /* number of parallel prefetches */
1453 2, /* Branch cost */
1454 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1455 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1456 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1457 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1458 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1459 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1460 btver2_memcpy,
1461 btver2_memset,
1462 4, /* scalar_stmt_cost. */
1463 2, /* scalar load_cost. */
1464 2, /* scalar_store_cost. */
1465 6, /* vec_stmt_cost. */
1466 0, /* vec_to_scalar_cost. */
1467 2, /* scalar_to_vec_cost. */
1468 2, /* vec_align_load_cost. */
1469 2, /* vec_unalign_load_cost. */
1470 2, /* vec_store_cost. */
1471 2, /* cond_taken_branch_cost. */
1472 1, /* cond_not_taken_branch_cost. */
1475 static stringop_algs pentium4_memcpy[2] = {
1476 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1477 DUMMY_STRINGOP_ALGS};
1478 static stringop_algs pentium4_memset[2] = {
1479 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1480 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1481 DUMMY_STRINGOP_ALGS};
1483 static const
1484 struct processor_costs pentium4_cost = {
1485 COSTS_N_INSNS (1), /* cost of an add instruction */
1486 COSTS_N_INSNS (3), /* cost of a lea instruction */
1487 COSTS_N_INSNS (4), /* variable shift costs */
1488 COSTS_N_INSNS (4), /* constant shift costs */
1489 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1490 COSTS_N_INSNS (15), /* HI */
1491 COSTS_N_INSNS (15), /* SI */
1492 COSTS_N_INSNS (15), /* DI */
1493 COSTS_N_INSNS (15)}, /* other */
1494 0, /* cost of multiply per each bit set */
1495 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1496 COSTS_N_INSNS (56), /* HI */
1497 COSTS_N_INSNS (56), /* SI */
1498 COSTS_N_INSNS (56), /* DI */
1499 COSTS_N_INSNS (56)}, /* other */
1500 COSTS_N_INSNS (1), /* cost of movsx */
1501 COSTS_N_INSNS (1), /* cost of movzx */
1502 16, /* "large" insn */
1503 6, /* MOVE_RATIO */
1504 2, /* cost for loading QImode using movzbl */
1505 {4, 5, 4}, /* cost of loading integer registers
1506 in QImode, HImode and SImode.
1507 Relative to reg-reg move (2). */
1508 {2, 3, 2}, /* cost of storing integer registers */
1509 2, /* cost of reg,reg fld/fst */
1510 {2, 2, 6}, /* cost of loading fp registers
1511 in SFmode, DFmode and XFmode */
1512 {4, 4, 6}, /* cost of storing fp registers
1513 in SFmode, DFmode and XFmode */
1514 2, /* cost of moving MMX register */
1515 {2, 2}, /* cost of loading MMX registers
1516 in SImode and DImode */
1517 {2, 2}, /* cost of storing MMX registers
1518 in SImode and DImode */
1519 12, /* cost of moving SSE register */
1520 {12, 12, 12}, /* cost of loading SSE registers
1521 in SImode, DImode and TImode */
1522 {2, 2, 8}, /* cost of storing SSE registers
1523 in SImode, DImode and TImode */
1524 10, /* MMX or SSE register to integer */
1525 8, /* size of l1 cache. */
1526 256, /* size of l2 cache. */
1527 64, /* size of prefetch block */
1528 6, /* number of parallel prefetches */
1529 2, /* Branch cost */
1530 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1531 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1532 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1533 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1534 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1535 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1536 pentium4_memcpy,
1537 pentium4_memset,
1538 1, /* scalar_stmt_cost. */
1539 1, /* scalar load_cost. */
1540 1, /* scalar_store_cost. */
1541 1, /* vec_stmt_cost. */
1542 1, /* vec_to_scalar_cost. */
1543 1, /* scalar_to_vec_cost. */
1544 1, /* vec_align_load_cost. */
1545 2, /* vec_unalign_load_cost. */
1546 1, /* vec_store_cost. */
1547 3, /* cond_taken_branch_cost. */
1548 1, /* cond_not_taken_branch_cost. */
1551 static stringop_algs nocona_memcpy[2] = {
1552 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1553 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1554 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1556 static stringop_algs nocona_memset[2] = {
1557 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1558 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1559 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1560 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1562 static const
1563 struct processor_costs nocona_cost = {
1564 COSTS_N_INSNS (1), /* cost of an add instruction */
1565 COSTS_N_INSNS (1), /* cost of a lea instruction */
1566 COSTS_N_INSNS (1), /* variable shift costs */
1567 COSTS_N_INSNS (1), /* constant shift costs */
1568 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1569 COSTS_N_INSNS (10), /* HI */
1570 COSTS_N_INSNS (10), /* SI */
1571 COSTS_N_INSNS (10), /* DI */
1572 COSTS_N_INSNS (10)}, /* other */
1573 0, /* cost of multiply per each bit set */
1574 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1575 COSTS_N_INSNS (66), /* HI */
1576 COSTS_N_INSNS (66), /* SI */
1577 COSTS_N_INSNS (66), /* DI */
1578 COSTS_N_INSNS (66)}, /* other */
1579 COSTS_N_INSNS (1), /* cost of movsx */
1580 COSTS_N_INSNS (1), /* cost of movzx */
1581 16, /* "large" insn */
1582 17, /* MOVE_RATIO */
1583 4, /* cost for loading QImode using movzbl */
1584 {4, 4, 4}, /* cost of loading integer registers
1585 in QImode, HImode and SImode.
1586 Relative to reg-reg move (2). */
1587 {4, 4, 4}, /* cost of storing integer registers */
1588 3, /* cost of reg,reg fld/fst */
1589 {12, 12, 12}, /* cost of loading fp registers
1590 in SFmode, DFmode and XFmode */
1591 {4, 4, 4}, /* cost of storing fp registers
1592 in SFmode, DFmode and XFmode */
1593 6, /* cost of moving MMX register */
1594 {12, 12}, /* cost of loading MMX registers
1595 in SImode and DImode */
1596 {12, 12}, /* cost of storing MMX registers
1597 in SImode and DImode */
1598 6, /* cost of moving SSE register */
1599 {12, 12, 12}, /* cost of loading SSE registers
1600 in SImode, DImode and TImode */
1601 {12, 12, 12}, /* cost of storing SSE registers
1602 in SImode, DImode and TImode */
1603 8, /* MMX or SSE register to integer */
1604 8, /* size of l1 cache. */
1605 1024, /* size of l2 cache. */
1606 64, /* size of prefetch block */
1607 8, /* number of parallel prefetches */
1608 1, /* Branch cost */
1609 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1610 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1611 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1612 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1613 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1614 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1615 nocona_memcpy,
1616 nocona_memset,
1617 1, /* scalar_stmt_cost. */
1618 1, /* scalar load_cost. */
1619 1, /* scalar_store_cost. */
1620 1, /* vec_stmt_cost. */
1621 1, /* vec_to_scalar_cost. */
1622 1, /* scalar_to_vec_cost. */
1623 1, /* vec_align_load_cost. */
1624 2, /* vec_unalign_load_cost. */
1625 1, /* vec_store_cost. */
1626 3, /* cond_taken_branch_cost. */
1627 1, /* cond_not_taken_branch_cost. */
1630 static stringop_algs atom_memcpy[2] = {
1631 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1632 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1633 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1634 static stringop_algs atom_memset[2] = {
1635 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1636 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1637 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1638 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1639 static const
1640 struct processor_costs atom_cost = {
1641 COSTS_N_INSNS (1), /* cost of an add instruction */
1642 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1643 COSTS_N_INSNS (1), /* variable shift costs */
1644 COSTS_N_INSNS (1), /* constant shift costs */
1645 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1646 COSTS_N_INSNS (4), /* HI */
1647 COSTS_N_INSNS (3), /* SI */
1648 COSTS_N_INSNS (4), /* DI */
1649 COSTS_N_INSNS (2)}, /* other */
1650 0, /* cost of multiply per each bit set */
1651 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1652 COSTS_N_INSNS (26), /* HI */
1653 COSTS_N_INSNS (42), /* SI */
1654 COSTS_N_INSNS (74), /* DI */
1655 COSTS_N_INSNS (74)}, /* other */
1656 COSTS_N_INSNS (1), /* cost of movsx */
1657 COSTS_N_INSNS (1), /* cost of movzx */
1658 8, /* "large" insn */
1659 17, /* MOVE_RATIO */
1660 4, /* cost for loading QImode using movzbl */
1661 {4, 4, 4}, /* cost of loading integer registers
1662 in QImode, HImode and SImode.
1663 Relative to reg-reg move (2). */
1664 {4, 4, 4}, /* cost of storing integer registers */
1665 4, /* cost of reg,reg fld/fst */
1666 {12, 12, 12}, /* cost of loading fp registers
1667 in SFmode, DFmode and XFmode */
1668 {6, 6, 8}, /* cost of storing fp registers
1669 in SFmode, DFmode and XFmode */
1670 2, /* cost of moving MMX register */
1671 {8, 8}, /* cost of loading MMX registers
1672 in SImode and DImode */
1673 {8, 8}, /* cost of storing MMX registers
1674 in SImode and DImode */
1675 2, /* cost of moving SSE register */
1676 {8, 8, 8}, /* cost of loading SSE registers
1677 in SImode, DImode and TImode */
1678 {8, 8, 8}, /* cost of storing SSE registers
1679 in SImode, DImode and TImode */
1680 5, /* MMX or SSE register to integer */
1681 32, /* size of l1 cache. */
1682 256, /* size of l2 cache. */
1683 64, /* size of prefetch block */
1684 6, /* number of parallel prefetches */
1685 3, /* Branch cost */
1686 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1687 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1688 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1689 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1690 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1691 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1692 atom_memcpy,
1693 atom_memset,
1694 1, /* scalar_stmt_cost. */
1695 1, /* scalar load_cost. */
1696 1, /* scalar_store_cost. */
1697 1, /* vec_stmt_cost. */
1698 1, /* vec_to_scalar_cost. */
1699 1, /* scalar_to_vec_cost. */
1700 1, /* vec_align_load_cost. */
1701 2, /* vec_unalign_load_cost. */
1702 1, /* vec_store_cost. */
1703 3, /* cond_taken_branch_cost. */
1704 1, /* cond_not_taken_branch_cost. */
1707 static stringop_algs slm_memcpy[2] = {
1708 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1709 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1710 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1711 static stringop_algs slm_memset[2] = {
1712 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1713 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1714 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1715 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1716 static const
1717 struct processor_costs slm_cost = {
1718 COSTS_N_INSNS (1), /* cost of an add instruction */
1719 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1720 COSTS_N_INSNS (1), /* variable shift costs */
1721 COSTS_N_INSNS (1), /* constant shift costs */
1722 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1723 COSTS_N_INSNS (3), /* HI */
1724 COSTS_N_INSNS (3), /* SI */
1725 COSTS_N_INSNS (4), /* DI */
1726 COSTS_N_INSNS (2)}, /* other */
1727 0, /* cost of multiply per each bit set */
1728 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1729 COSTS_N_INSNS (26), /* HI */
1730 COSTS_N_INSNS (42), /* SI */
1731 COSTS_N_INSNS (74), /* DI */
1732 COSTS_N_INSNS (74)}, /* other */
1733 COSTS_N_INSNS (1), /* cost of movsx */
1734 COSTS_N_INSNS (1), /* cost of movzx */
1735 8, /* "large" insn */
1736 17, /* MOVE_RATIO */
1737 4, /* cost for loading QImode using movzbl */
1738 {4, 4, 4}, /* cost of loading integer registers
1739 in QImode, HImode and SImode.
1740 Relative to reg-reg move (2). */
1741 {4, 4, 4}, /* cost of storing integer registers */
1742 4, /* cost of reg,reg fld/fst */
1743 {12, 12, 12}, /* cost of loading fp registers
1744 in SFmode, DFmode and XFmode */
1745 {6, 6, 8}, /* cost of storing fp registers
1746 in SFmode, DFmode and XFmode */
1747 2, /* cost of moving MMX register */
1748 {8, 8}, /* cost of loading MMX registers
1749 in SImode and DImode */
1750 {8, 8}, /* cost of storing MMX registers
1751 in SImode and DImode */
1752 2, /* cost of moving SSE register */
1753 {8, 8, 8}, /* cost of loading SSE registers
1754 in SImode, DImode and TImode */
1755 {8, 8, 8}, /* cost of storing SSE registers
1756 in SImode, DImode and TImode */
1757 5, /* MMX or SSE register to integer */
1758 32, /* size of l1 cache. */
1759 256, /* size of l2 cache. */
1760 64, /* size of prefetch block */
1761 6, /* number of parallel prefetches */
1762 3, /* Branch cost */
1763 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1764 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1765 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1766 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1767 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1768 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1769 slm_memcpy,
1770 slm_memset,
1771 1, /* scalar_stmt_cost. */
1772 1, /* scalar load_cost. */
1773 1, /* scalar_store_cost. */
1774 1, /* vec_stmt_cost. */
1775 4, /* vec_to_scalar_cost. */
1776 1, /* scalar_to_vec_cost. */
1777 1, /* vec_align_load_cost. */
1778 2, /* vec_unalign_load_cost. */
1779 1, /* vec_store_cost. */
1780 3, /* cond_taken_branch_cost. */
1781 1, /* cond_not_taken_branch_cost. */
1784 static stringop_algs intel_memcpy[2] = {
1785 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1786 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1787 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1788 static stringop_algs intel_memset[2] = {
1789 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1790 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1791 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1792 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1793 static const
1794 struct processor_costs intel_cost = {
1795 COSTS_N_INSNS (1), /* cost of an add instruction */
1796 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1797 COSTS_N_INSNS (1), /* variable shift costs */
1798 COSTS_N_INSNS (1), /* constant shift costs */
1799 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1800 COSTS_N_INSNS (3), /* HI */
1801 COSTS_N_INSNS (3), /* SI */
1802 COSTS_N_INSNS (4), /* DI */
1803 COSTS_N_INSNS (2)}, /* other */
1804 0, /* cost of multiply per each bit set */
1805 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1806 COSTS_N_INSNS (26), /* HI */
1807 COSTS_N_INSNS (42), /* SI */
1808 COSTS_N_INSNS (74), /* DI */
1809 COSTS_N_INSNS (74)}, /* other */
1810 COSTS_N_INSNS (1), /* cost of movsx */
1811 COSTS_N_INSNS (1), /* cost of movzx */
1812 8, /* "large" insn */
1813 17, /* MOVE_RATIO */
1814 4, /* cost for loading QImode using movzbl */
1815 {4, 4, 4}, /* cost of loading integer registers
1816 in QImode, HImode and SImode.
1817 Relative to reg-reg move (2). */
1818 {4, 4, 4}, /* cost of storing integer registers */
1819 4, /* cost of reg,reg fld/fst */
1820 {12, 12, 12}, /* cost of loading fp registers
1821 in SFmode, DFmode and XFmode */
1822 {6, 6, 8}, /* cost of storing fp registers
1823 in SFmode, DFmode and XFmode */
1824 2, /* cost of moving MMX register */
1825 {8, 8}, /* cost of loading MMX registers
1826 in SImode and DImode */
1827 {8, 8}, /* cost of storing MMX registers
1828 in SImode and DImode */
1829 2, /* cost of moving SSE register */
1830 {8, 8, 8}, /* cost of loading SSE registers
1831 in SImode, DImode and TImode */
1832 {8, 8, 8}, /* cost of storing SSE registers
1833 in SImode, DImode and TImode */
1834 5, /* MMX or SSE register to integer */
1835 32, /* size of l1 cache. */
1836 256, /* size of l2 cache. */
1837 64, /* size of prefetch block */
1838 6, /* number of parallel prefetches */
1839 3, /* Branch cost */
1840 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1841 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1842 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1843 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1844 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1845 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1846 intel_memcpy,
1847 intel_memset,
1848 1, /* scalar_stmt_cost. */
1849 1, /* scalar load_cost. */
1850 1, /* scalar_store_cost. */
1851 1, /* vec_stmt_cost. */
1852 4, /* vec_to_scalar_cost. */
1853 1, /* scalar_to_vec_cost. */
1854 1, /* vec_align_load_cost. */
1855 2, /* vec_unalign_load_cost. */
1856 1, /* vec_store_cost. */
1857 3, /* cond_taken_branch_cost. */
1858 1, /* cond_not_taken_branch_cost. */
1861 /* Generic should produce code tuned for Core-i7 (and newer chips)
1862 and btver1 (and newer chips). */
1864 static stringop_algs generic_memcpy[2] = {
1865 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1866 {-1, libcall, false}}},
1867 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1868 {-1, libcall, false}}}};
1869 static stringop_algs generic_memset[2] = {
1870 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1871 {-1, libcall, false}}},
1872 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1873 {-1, libcall, false}}}};
1874 static const
1875 struct processor_costs generic_cost = {
1876 COSTS_N_INSNS (1), /* cost of an add instruction */
1877 /* On all chips taken into consideration lea is 2 cycles and more. With
1878 this cost however our current implementation of synth_mult results in
1879 use of unnecessary temporary registers causing regression on several
1880 SPECfp benchmarks. */
1881 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1882 COSTS_N_INSNS (1), /* variable shift costs */
1883 COSTS_N_INSNS (1), /* constant shift costs */
1884 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1885 COSTS_N_INSNS (4), /* HI */
1886 COSTS_N_INSNS (3), /* SI */
1887 COSTS_N_INSNS (4), /* DI */
1888 COSTS_N_INSNS (2)}, /* other */
1889 0, /* cost of multiply per each bit set */
1890 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1891 COSTS_N_INSNS (26), /* HI */
1892 COSTS_N_INSNS (42), /* SI */
1893 COSTS_N_INSNS (74), /* DI */
1894 COSTS_N_INSNS (74)}, /* other */
1895 COSTS_N_INSNS (1), /* cost of movsx */
1896 COSTS_N_INSNS (1), /* cost of movzx */
1897 8, /* "large" insn */
1898 17, /* MOVE_RATIO */
1899 4, /* cost for loading QImode using movzbl */
1900 {4, 4, 4}, /* cost of loading integer registers
1901 in QImode, HImode and SImode.
1902 Relative to reg-reg move (2). */
1903 {4, 4, 4}, /* cost of storing integer registers */
1904 4, /* cost of reg,reg fld/fst */
1905 {12, 12, 12}, /* cost of loading fp registers
1906 in SFmode, DFmode and XFmode */
1907 {6, 6, 8}, /* cost of storing fp registers
1908 in SFmode, DFmode and XFmode */
1909 2, /* cost of moving MMX register */
1910 {8, 8}, /* cost of loading MMX registers
1911 in SImode and DImode */
1912 {8, 8}, /* cost of storing MMX registers
1913 in SImode and DImode */
1914 2, /* cost of moving SSE register */
1915 {8, 8, 8}, /* cost of loading SSE registers
1916 in SImode, DImode and TImode */
1917 {8, 8, 8}, /* cost of storing SSE registers
1918 in SImode, DImode and TImode */
1919 5, /* MMX or SSE register to integer */
1920 32, /* size of l1 cache. */
1921 512, /* size of l2 cache. */
1922 64, /* size of prefetch block */
1923 6, /* number of parallel prefetches */
1924 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1925 value is increased to perhaps more appropriate value of 5. */
1926 3, /* Branch cost */
1927 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1928 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1929 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1930 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1931 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1932 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1933 generic_memcpy,
1934 generic_memset,
1935 1, /* scalar_stmt_cost. */
1936 1, /* scalar load_cost. */
1937 1, /* scalar_store_cost. */
1938 1, /* vec_stmt_cost. */
1939 1, /* vec_to_scalar_cost. */
1940 1, /* scalar_to_vec_cost. */
1941 1, /* vec_align_load_cost. */
1942 2, /* vec_unalign_load_cost. */
1943 1, /* vec_store_cost. */
1944 3, /* cond_taken_branch_cost. */
1945 1, /* cond_not_taken_branch_cost. */
1948 /* core_cost should produce code tuned for Core familly of CPUs. */
1949 static stringop_algs core_memcpy[2] = {
1950 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1951 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1952 {-1, libcall, false}}}};
1953 static stringop_algs core_memset[2] = {
1954 {libcall, {{6, loop_1_byte, true},
1955 {24, loop, true},
1956 {8192, rep_prefix_4_byte, true},
1957 {-1, libcall, false}}},
1958 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1959 {-1, libcall, false}}}};
1961 static const
1962 struct processor_costs core_cost = {
1963 COSTS_N_INSNS (1), /* cost of an add instruction */
1964 /* On all chips taken into consideration lea is 2 cycles and more. With
1965 this cost however our current implementation of synth_mult results in
1966 use of unnecessary temporary registers causing regression on several
1967 SPECfp benchmarks. */
1968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1969 COSTS_N_INSNS (1), /* variable shift costs */
1970 COSTS_N_INSNS (1), /* constant shift costs */
1971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1972 COSTS_N_INSNS (4), /* HI */
1973 COSTS_N_INSNS (3), /* SI */
1974 COSTS_N_INSNS (4), /* DI */
1975 COSTS_N_INSNS (2)}, /* other */
1976 0, /* cost of multiply per each bit set */
1977 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1978 COSTS_N_INSNS (26), /* HI */
1979 COSTS_N_INSNS (42), /* SI */
1980 COSTS_N_INSNS (74), /* DI */
1981 COSTS_N_INSNS (74)}, /* other */
1982 COSTS_N_INSNS (1), /* cost of movsx */
1983 COSTS_N_INSNS (1), /* cost of movzx */
1984 8, /* "large" insn */
1985 17, /* MOVE_RATIO */
1986 4, /* cost for loading QImode using movzbl */
1987 {4, 4, 4}, /* cost of loading integer registers
1988 in QImode, HImode and SImode.
1989 Relative to reg-reg move (2). */
1990 {4, 4, 4}, /* cost of storing integer registers */
1991 4, /* cost of reg,reg fld/fst */
1992 {12, 12, 12}, /* cost of loading fp registers
1993 in SFmode, DFmode and XFmode */
1994 {6, 6, 8}, /* cost of storing fp registers
1995 in SFmode, DFmode and XFmode */
1996 2, /* cost of moving MMX register */
1997 {8, 8}, /* cost of loading MMX registers
1998 in SImode and DImode */
1999 {8, 8}, /* cost of storing MMX registers
2000 in SImode and DImode */
2001 2, /* cost of moving SSE register */
2002 {8, 8, 8}, /* cost of loading SSE registers
2003 in SImode, DImode and TImode */
2004 {8, 8, 8}, /* cost of storing SSE registers
2005 in SImode, DImode and TImode */
2006 5, /* MMX or SSE register to integer */
2007 64, /* size of l1 cache. */
2008 512, /* size of l2 cache. */
2009 64, /* size of prefetch block */
2010 6, /* number of parallel prefetches */
2011 /* FIXME perhaps more appropriate value is 5. */
2012 3, /* Branch cost */
2013 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2014 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2015 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2016 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2017 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2018 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2019 core_memcpy,
2020 core_memset,
2021 1, /* scalar_stmt_cost. */
2022 1, /* scalar load_cost. */
2023 1, /* scalar_store_cost. */
2024 1, /* vec_stmt_cost. */
2025 1, /* vec_to_scalar_cost. */
2026 1, /* scalar_to_vec_cost. */
2027 1, /* vec_align_load_cost. */
2028 2, /* vec_unalign_load_cost. */
2029 1, /* vec_store_cost. */
2030 3, /* cond_taken_branch_cost. */
2031 1, /* cond_not_taken_branch_cost. */
2035 /* Set by -mtune. */
2036 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2038 /* Set by -mtune or -Os. */
2039 const struct processor_costs *ix86_cost = &pentium_cost;
2041 /* Processor feature/optimization bitmasks. */
2042 #define m_386 (1<<PROCESSOR_I386)
2043 #define m_486 (1<<PROCESSOR_I486)
2044 #define m_PENT (1<<PROCESSOR_PENTIUM)
2045 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2046 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2047 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2048 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2049 #define m_CORE2 (1<<PROCESSOR_CORE2)
2050 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2051 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2052 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2053 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2054 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2055 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2056 #define m_KNL (1<<PROCESSOR_KNL)
2057 #define m_INTEL (1<<PROCESSOR_INTEL)
2059 #define m_GEODE (1<<PROCESSOR_GEODE)
2060 #define m_K6 (1<<PROCESSOR_K6)
2061 #define m_K6_GEODE (m_K6 | m_GEODE)
2062 #define m_K8 (1<<PROCESSOR_K8)
2063 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2064 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2065 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2066 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2067 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2068 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2069 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2070 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2071 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2072 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2073 #define m_BTVER (m_BTVER1 | m_BTVER2)
2074 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2076 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2078 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2079 #undef DEF_TUNE
2080 #define DEF_TUNE(tune, name, selector) name,
2081 #include "x86-tune.def"
2082 #undef DEF_TUNE
2085 /* Feature tests against the various tunings. */
2086 unsigned char ix86_tune_features[X86_TUNE_LAST];
2088 /* Feature tests against the various tunings used to create ix86_tune_features
2089 based on the processor mask. */
2090 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2091 #undef DEF_TUNE
2092 #define DEF_TUNE(tune, name, selector) selector,
2093 #include "x86-tune.def"
2094 #undef DEF_TUNE
2097 /* Feature tests against the various architecture variations. */
2098 unsigned char ix86_arch_features[X86_ARCH_LAST];
2100 /* Feature tests against the various architecture variations, used to create
2101 ix86_arch_features based on the processor mask. */
2102 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2103 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2104 ~(m_386 | m_486 | m_PENT | m_K6),
2106 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2107 ~m_386,
2109 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2110 ~(m_386 | m_486),
2112 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2113 ~m_386,
2115 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2116 ~m_386,
2119 /* In case the average insn count for single function invocation is
2120 lower than this constant, emit fast (but longer) prologue and
2121 epilogue code. */
2122 #define FAST_PROLOGUE_INSN_COUNT 20
2124 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2125 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2126 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2127 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2129 /* Array of the smallest class containing reg number REGNO, indexed by
2130 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2132 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2134 /* ax, dx, cx, bx */
2135 AREG, DREG, CREG, BREG,
2136 /* si, di, bp, sp */
2137 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2138 /* FP registers */
2139 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2140 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2141 /* arg pointer */
2142 NON_Q_REGS,
2143 /* flags, fpsr, fpcr, frame */
2144 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2145 /* SSE registers */
2146 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2147 SSE_REGS, SSE_REGS,
2148 /* MMX registers */
2149 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2150 MMX_REGS, MMX_REGS,
2151 /* REX registers */
2152 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2153 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2154 /* SSE REX registers */
2155 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2156 SSE_REGS, SSE_REGS,
2157 /* AVX-512 SSE registers */
2158 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2159 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2160 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2161 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2162 /* Mask registers. */
2163 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2164 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2165 /* MPX bound registers */
2166 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2169 /* The "default" register map used in 32bit mode. */
2171 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2173 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2174 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2175 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2176 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2177 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2178 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2179 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2180 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2181 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2182 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2183 101, 102, 103, 104, /* bound registers */
2186 /* The "default" register map used in 64bit mode. */
2188 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2190 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2191 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2192 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2193 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2194 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2195 8,9,10,11,12,13,14,15, /* extended integer registers */
2196 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2197 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2198 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2199 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2200 126, 127, 128, 129, /* bound registers */
2203 /* Define the register numbers to be used in Dwarf debugging information.
2204 The SVR4 reference port C compiler uses the following register numbers
2205 in its Dwarf output code:
2206 0 for %eax (gcc regno = 0)
2207 1 for %ecx (gcc regno = 2)
2208 2 for %edx (gcc regno = 1)
2209 3 for %ebx (gcc regno = 3)
2210 4 for %esp (gcc regno = 7)
2211 5 for %ebp (gcc regno = 6)
2212 6 for %esi (gcc regno = 4)
2213 7 for %edi (gcc regno = 5)
2214 The following three DWARF register numbers are never generated by
2215 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2216 believes these numbers have these meanings.
2217 8 for %eip (no gcc equivalent)
2218 9 for %eflags (gcc regno = 17)
2219 10 for %trapno (no gcc equivalent)
2220 It is not at all clear how we should number the FP stack registers
2221 for the x86 architecture. If the version of SDB on x86/svr4 were
2222 a bit less brain dead with respect to floating-point then we would
2223 have a precedent to follow with respect to DWARF register numbers
2224 for x86 FP registers, but the SDB on x86/svr4 is so completely
2225 broken with respect to FP registers that it is hardly worth thinking
2226 of it as something to strive for compatibility with.
2227 The version of x86/svr4 SDB I have at the moment does (partially)
2228 seem to believe that DWARF register number 11 is associated with
2229 the x86 register %st(0), but that's about all. Higher DWARF
2230 register numbers don't seem to be associated with anything in
2231 particular, and even for DWARF regno 11, SDB only seems to under-
2232 stand that it should say that a variable lives in %st(0) (when
2233 asked via an `=' command) if we said it was in DWARF regno 11,
2234 but SDB still prints garbage when asked for the value of the
2235 variable in question (via a `/' command).
2236 (Also note that the labels SDB prints for various FP stack regs
2237 when doing an `x' command are all wrong.)
2238 Note that these problems generally don't affect the native SVR4
2239 C compiler because it doesn't allow the use of -O with -g and
2240 because when it is *not* optimizing, it allocates a memory
2241 location for each floating-point variable, and the memory
2242 location is what gets described in the DWARF AT_location
2243 attribute for the variable in question.
2244 Regardless of the severe mental illness of the x86/svr4 SDB, we
2245 do something sensible here and we use the following DWARF
2246 register numbers. Note that these are all stack-top-relative
2247 numbers.
2248 11 for %st(0) (gcc regno = 8)
2249 12 for %st(1) (gcc regno = 9)
2250 13 for %st(2) (gcc regno = 10)
2251 14 for %st(3) (gcc regno = 11)
2252 15 for %st(4) (gcc regno = 12)
2253 16 for %st(5) (gcc regno = 13)
2254 17 for %st(6) (gcc regno = 14)
2255 18 for %st(7) (gcc regno = 15)
2257 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2259 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2260 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2261 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2262 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2263 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2264 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2265 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2267 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2268 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2269 101, 102, 103, 104, /* bound registers */
2272 /* Define parameter passing and return registers. */
2274 static int const x86_64_int_parameter_registers[6] =
2276 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2279 static int const x86_64_ms_abi_int_parameter_registers[4] =
2281 CX_REG, DX_REG, R8_REG, R9_REG
2284 static int const x86_64_int_return_registers[4] =
2286 AX_REG, DX_REG, DI_REG, SI_REG
2289 /* Additional registers that are clobbered by SYSV calls. */
2291 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2293 SI_REG, DI_REG,
2294 XMM6_REG, XMM7_REG,
2295 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2296 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2299 /* Define the structure for the machine field in struct function. */
2301 struct GTY(()) stack_local_entry {
2302 unsigned short mode;
2303 unsigned short n;
2304 rtx rtl;
2305 struct stack_local_entry *next;
2308 /* Structure describing stack frame layout.
2309 Stack grows downward:
2311 [arguments]
2312 <- ARG_POINTER
2313 saved pc
2315 saved static chain if ix86_static_chain_on_stack
2317 saved frame pointer if frame_pointer_needed
2318 <- HARD_FRAME_POINTER
2319 [saved regs]
2320 <- regs_save_offset
2321 [padding0]
2323 [saved SSE regs]
2324 <- sse_regs_save_offset
2325 [padding1] |
2326 | <- FRAME_POINTER
2327 [va_arg registers] |
2329 [frame] |
2331 [padding2] | = to_allocate
2332 <- STACK_POINTER
2334 struct ix86_frame
2336 int nsseregs;
2337 int nregs;
2338 int va_arg_size;
2339 int red_zone_size;
2340 int outgoing_arguments_size;
2342 /* The offsets relative to ARG_POINTER. */
2343 HOST_WIDE_INT frame_pointer_offset;
2344 HOST_WIDE_INT hard_frame_pointer_offset;
2345 HOST_WIDE_INT stack_pointer_offset;
2346 HOST_WIDE_INT hfp_save_offset;
2347 HOST_WIDE_INT reg_save_offset;
2348 HOST_WIDE_INT sse_reg_save_offset;
2350 /* When save_regs_using_mov is set, emit prologue using
2351 move instead of push instructions. */
2352 bool save_regs_using_mov;
2355 /* Which cpu are we scheduling for. */
2356 enum attr_cpu ix86_schedule;
2358 /* Which cpu are we optimizing for. */
2359 enum processor_type ix86_tune;
2361 /* Which instruction set architecture to use. */
2362 enum processor_type ix86_arch;
2364 /* True if processor has SSE prefetch instruction. */
2365 unsigned char x86_prefetch_sse;
2367 /* -mstackrealign option */
2368 static const char ix86_force_align_arg_pointer_string[]
2369 = "force_align_arg_pointer";
2371 static rtx (*ix86_gen_leave) (void);
2372 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2373 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2374 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2375 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2376 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2377 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2378 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2379 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2380 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2381 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2382 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2384 /* Preferred alignment for stack boundary in bits. */
2385 unsigned int ix86_preferred_stack_boundary;
2387 /* Alignment for incoming stack boundary in bits specified at
2388 command line. */
2389 static unsigned int ix86_user_incoming_stack_boundary;
2391 /* Default alignment for incoming stack boundary in bits. */
2392 static unsigned int ix86_default_incoming_stack_boundary;
2394 /* Alignment for incoming stack boundary in bits. */
2395 unsigned int ix86_incoming_stack_boundary;
2397 /* Calling abi specific va_list type nodes. */
2398 static GTY(()) tree sysv_va_list_type_node;
2399 static GTY(()) tree ms_va_list_type_node;
2401 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2402 char internal_label_prefix[16];
2403 int internal_label_prefix_len;
2405 /* Fence to use after loop using movnt. */
2406 tree x86_mfence;
2408 /* Register class used for passing given 64bit part of the argument.
2409 These represent classes as documented by the PS ABI, with the exception
2410 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2411 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2413 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2414 whenever possible (upper half does contain padding). */
2415 enum x86_64_reg_class
2417 X86_64_NO_CLASS,
2418 X86_64_INTEGER_CLASS,
2419 X86_64_INTEGERSI_CLASS,
2420 X86_64_SSE_CLASS,
2421 X86_64_SSESF_CLASS,
2422 X86_64_SSEDF_CLASS,
2423 X86_64_SSEUP_CLASS,
2424 X86_64_X87_CLASS,
2425 X86_64_X87UP_CLASS,
2426 X86_64_COMPLEX_X87_CLASS,
2427 X86_64_MEMORY_CLASS
2430 #define MAX_CLASSES 8
2432 /* Table of constants used by fldpi, fldln2, etc.... */
2433 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2434 static bool ext_80387_constants_init = 0;
2437 static struct machine_function * ix86_init_machine_status (void);
2438 static rtx ix86_function_value (const_tree, const_tree, bool);
2439 static bool ix86_function_value_regno_p (const unsigned int);
2440 static unsigned int ix86_function_arg_boundary (machine_mode,
2441 const_tree);
2442 static rtx ix86_static_chain (const_tree, bool);
2443 static int ix86_function_regparm (const_tree, const_tree);
2444 static void ix86_compute_frame_layout (struct ix86_frame *);
2445 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2446 rtx, rtx, int);
2447 static void ix86_add_new_builtins (HOST_WIDE_INT);
2448 static tree ix86_canonical_va_list_type (tree);
2449 static void predict_jump (int);
2450 static unsigned int split_stack_prologue_scratch_regno (void);
2451 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2453 enum ix86_function_specific_strings
2455 IX86_FUNCTION_SPECIFIC_ARCH,
2456 IX86_FUNCTION_SPECIFIC_TUNE,
2457 IX86_FUNCTION_SPECIFIC_MAX
2460 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2461 const char *, enum fpmath_unit, bool);
2462 static void ix86_function_specific_save (struct cl_target_option *,
2463 struct gcc_options *opts);
2464 static void ix86_function_specific_restore (struct gcc_options *opts,
2465 struct cl_target_option *);
2466 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
2467 static void ix86_function_specific_print (FILE *, int,
2468 struct cl_target_option *);
2469 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2470 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2471 struct gcc_options *,
2472 struct gcc_options *,
2473 struct gcc_options *);
2474 static bool ix86_can_inline_p (tree, tree);
2475 static void ix86_set_current_function (tree);
2476 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2478 static enum calling_abi ix86_function_abi (const_tree);
2481 #ifndef SUBTARGET32_DEFAULT_CPU
2482 #define SUBTARGET32_DEFAULT_CPU "i386"
2483 #endif
2485 /* Whether -mtune= or -march= were specified */
2486 static int ix86_tune_defaulted;
2487 static int ix86_arch_specified;
2489 /* Vectorization library interface and handlers. */
2490 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2492 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2493 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2495 /* Processor target table, indexed by processor number */
2496 struct ptt
2498 const char *const name; /* processor name */
2499 const struct processor_costs *cost; /* Processor costs */
2500 const int align_loop; /* Default alignments. */
2501 const int align_loop_max_skip;
2502 const int align_jump;
2503 const int align_jump_max_skip;
2504 const int align_func;
2507 /* This table must be in sync with enum processor_type in i386.h. */
2508 static const struct ptt processor_target_table[PROCESSOR_max] =
2510 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2511 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2512 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2513 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2514 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2515 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2516 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2517 {"core2", &core_cost, 16, 10, 16, 10, 16},
2518 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2519 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2520 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2521 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2522 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2523 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2524 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2525 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2526 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2527 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2528 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2529 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2530 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2531 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2532 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2533 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2534 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2535 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2538 static unsigned int
2539 rest_of_handle_insert_vzeroupper (void)
2541 int i;
2543 /* vzeroupper instructions are inserted immediately after reload to
2544 account for possible spills from 256bit registers. The pass
2545 reuses mode switching infrastructure by re-running mode insertion
2546 pass, so disable entities that have already been processed. */
2547 for (i = 0; i < MAX_386_ENTITIES; i++)
2548 ix86_optimize_mode_switching[i] = 0;
2550 ix86_optimize_mode_switching[AVX_U128] = 1;
2552 /* Call optimize_mode_switching. */
2553 g->get_passes ()->execute_pass_mode_switching ();
2554 return 0;
2557 namespace {
2559 const pass_data pass_data_insert_vzeroupper =
2561 RTL_PASS, /* type */
2562 "vzeroupper", /* name */
2563 OPTGROUP_NONE, /* optinfo_flags */
2564 TV_NONE, /* tv_id */
2565 0, /* properties_required */
2566 0, /* properties_provided */
2567 0, /* properties_destroyed */
2568 0, /* todo_flags_start */
2569 TODO_df_finish, /* todo_flags_finish */
2572 class pass_insert_vzeroupper : public rtl_opt_pass
2574 public:
2575 pass_insert_vzeroupper(gcc::context *ctxt)
2576 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2579 /* opt_pass methods: */
2580 virtual bool gate (function *)
2582 return TARGET_AVX && !TARGET_AVX512F
2583 && TARGET_VZEROUPPER && flag_expensive_optimizations
2584 && !optimize_size;
2587 virtual unsigned int execute (function *)
2589 return rest_of_handle_insert_vzeroupper ();
2592 }; // class pass_insert_vzeroupper
2594 } // anon namespace
2596 rtl_opt_pass *
2597 make_pass_insert_vzeroupper (gcc::context *ctxt)
2599 return new pass_insert_vzeroupper (ctxt);
2602 /* Return true if a red-zone is in use. */
2604 static inline bool
2605 ix86_using_red_zone (void)
2607 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2610 /* Return a string that documents the current -m options. The caller is
2611 responsible for freeing the string. */
2613 static char *
2614 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2615 const char *tune, enum fpmath_unit fpmath,
2616 bool add_nl_p)
2618 struct ix86_target_opts
2620 const char *option; /* option string */
2621 HOST_WIDE_INT mask; /* isa mask options */
2624 /* This table is ordered so that options like -msse4.2 that imply
2625 preceding options while match those first. */
2626 static struct ix86_target_opts isa_opts[] =
2628 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2629 { "-mfma", OPTION_MASK_ISA_FMA },
2630 { "-mxop", OPTION_MASK_ISA_XOP },
2631 { "-mlwp", OPTION_MASK_ISA_LWP },
2632 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2633 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2634 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2635 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2636 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2637 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2638 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2639 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2640 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2641 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2642 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2643 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2644 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2645 { "-msse3", OPTION_MASK_ISA_SSE3 },
2646 { "-msse2", OPTION_MASK_ISA_SSE2 },
2647 { "-msse", OPTION_MASK_ISA_SSE },
2648 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2649 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2650 { "-mmmx", OPTION_MASK_ISA_MMX },
2651 { "-mabm", OPTION_MASK_ISA_ABM },
2652 { "-mbmi", OPTION_MASK_ISA_BMI },
2653 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2654 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2655 { "-mhle", OPTION_MASK_ISA_HLE },
2656 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2657 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2658 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2659 { "-madx", OPTION_MASK_ISA_ADX },
2660 { "-mtbm", OPTION_MASK_ISA_TBM },
2661 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2662 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2663 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2664 { "-maes", OPTION_MASK_ISA_AES },
2665 { "-msha", OPTION_MASK_ISA_SHA },
2666 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2667 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2668 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2669 { "-mf16c", OPTION_MASK_ISA_F16C },
2670 { "-mrtm", OPTION_MASK_ISA_RTM },
2671 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2672 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2673 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2674 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2675 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2676 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2677 { "-mmpx", OPTION_MASK_ISA_MPX },
2678 { "-mclwb", OPTION_MASK_ISA_CLWB },
2679 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
2682 /* Flag options. */
2683 static struct ix86_target_opts flag_opts[] =
2685 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2686 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2687 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2688 { "-m80387", MASK_80387 },
2689 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2690 { "-malign-double", MASK_ALIGN_DOUBLE },
2691 { "-mcld", MASK_CLD },
2692 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2693 { "-mieee-fp", MASK_IEEE_FP },
2694 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2695 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2696 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2697 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2698 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2699 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2700 { "-mno-red-zone", MASK_NO_RED_ZONE },
2701 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2702 { "-mrecip", MASK_RECIP },
2703 { "-mrtd", MASK_RTD },
2704 { "-msseregparm", MASK_SSEREGPARM },
2705 { "-mstack-arg-probe", MASK_STACK_PROBE },
2706 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2707 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2708 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2709 { "-mvzeroupper", MASK_VZEROUPPER },
2710 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2711 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2712 { "-mprefer-avx128", MASK_PREFER_AVX128},
2715 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2717 char isa_other[40];
2718 char target_other[40];
2719 unsigned num = 0;
2720 unsigned i, j;
2721 char *ret;
2722 char *ptr;
2723 size_t len;
2724 size_t line_len;
2725 size_t sep_len;
2726 const char *abi;
2728 memset (opts, '\0', sizeof (opts));
2730 /* Add -march= option. */
2731 if (arch)
2733 opts[num][0] = "-march=";
2734 opts[num++][1] = arch;
2737 /* Add -mtune= option. */
2738 if (tune)
2740 opts[num][0] = "-mtune=";
2741 opts[num++][1] = tune;
2744 /* Add -m32/-m64/-mx32. */
2745 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2747 if ((isa & OPTION_MASK_ABI_64) != 0)
2748 abi = "-m64";
2749 else
2750 abi = "-mx32";
2751 isa &= ~ (OPTION_MASK_ISA_64BIT
2752 | OPTION_MASK_ABI_64
2753 | OPTION_MASK_ABI_X32);
2755 else
2756 abi = "-m32";
2757 opts[num++][0] = abi;
2759 /* Pick out the options in isa options. */
2760 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2762 if ((isa & isa_opts[i].mask) != 0)
2764 opts[num++][0] = isa_opts[i].option;
2765 isa &= ~ isa_opts[i].mask;
2769 if (isa && add_nl_p)
2771 opts[num++][0] = isa_other;
2772 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2773 isa);
2776 /* Add flag options. */
2777 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2779 if ((flags & flag_opts[i].mask) != 0)
2781 opts[num++][0] = flag_opts[i].option;
2782 flags &= ~ flag_opts[i].mask;
2786 if (flags && add_nl_p)
2788 opts[num++][0] = target_other;
2789 sprintf (target_other, "(other flags: %#x)", flags);
2792 /* Add -fpmath= option. */
2793 if (fpmath)
2795 opts[num][0] = "-mfpmath=";
2796 switch ((int) fpmath)
2798 case FPMATH_387:
2799 opts[num++][1] = "387";
2800 break;
2802 case FPMATH_SSE:
2803 opts[num++][1] = "sse";
2804 break;
2806 case FPMATH_387 | FPMATH_SSE:
2807 opts[num++][1] = "sse+387";
2808 break;
2810 default:
2811 gcc_unreachable ();
2815 /* Any options? */
2816 if (num == 0)
2817 return NULL;
2819 gcc_assert (num < ARRAY_SIZE (opts));
2821 /* Size the string. */
2822 len = 0;
2823 sep_len = (add_nl_p) ? 3 : 1;
2824 for (i = 0; i < num; i++)
2826 len += sep_len;
2827 for (j = 0; j < 2; j++)
2828 if (opts[i][j])
2829 len += strlen (opts[i][j]);
2832 /* Build the string. */
2833 ret = ptr = (char *) xmalloc (len);
2834 line_len = 0;
2836 for (i = 0; i < num; i++)
2838 size_t len2[2];
2840 for (j = 0; j < 2; j++)
2841 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2843 if (i != 0)
2845 *ptr++ = ' ';
2846 line_len++;
2848 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2850 *ptr++ = '\\';
2851 *ptr++ = '\n';
2852 line_len = 0;
2856 for (j = 0; j < 2; j++)
2857 if (opts[i][j])
2859 memcpy (ptr, opts[i][j], len2[j]);
2860 ptr += len2[j];
2861 line_len += len2[j];
2865 *ptr = '\0';
2866 gcc_assert (ret + len >= ptr);
2868 return ret;
2871 /* Return true, if profiling code should be emitted before
2872 prologue. Otherwise it returns false.
2873 Note: For x86 with "hotfix" it is sorried. */
2874 static bool
2875 ix86_profile_before_prologue (void)
2877 return flag_fentry != 0;
2880 /* Function that is callable from the debugger to print the current
2881 options. */
2882 void ATTRIBUTE_UNUSED
2883 ix86_debug_options (void)
2885 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2886 ix86_arch_string, ix86_tune_string,
2887 ix86_fpmath, true);
2889 if (opts)
2891 fprintf (stderr, "%s\n\n", opts);
2892 free (opts);
2894 else
2895 fputs ("<no options>\n\n", stderr);
2897 return;
2900 static const char *stringop_alg_names[] = {
2901 #define DEF_ENUM
2902 #define DEF_ALG(alg, name) #name,
2903 #include "stringop.def"
2904 #undef DEF_ENUM
2905 #undef DEF_ALG
2908 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2909 The string is of the following form (or comma separated list of it):
2911 strategy_alg:max_size:[align|noalign]
2913 where the full size range for the strategy is either [0, max_size] or
2914 [min_size, max_size], in which min_size is the max_size + 1 of the
2915 preceding range. The last size range must have max_size == -1.
2917 Examples:
2920 -mmemcpy-strategy=libcall:-1:noalign
2922 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2926 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2928 This is to tell the compiler to use the following strategy for memset
2929 1) when the expected size is between [1, 16], use rep_8byte strategy;
2930 2) when the size is between [17, 2048], use vector_loop;
2931 3) when the size is > 2048, use libcall. */
2933 struct stringop_size_range
2935 int max;
2936 stringop_alg alg;
2937 bool noalign;
2940 static void
2941 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2943 const struct stringop_algs *default_algs;
2944 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2945 char *curr_range_str, *next_range_str;
2946 int i = 0, n = 0;
2948 if (is_memset)
2949 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2950 else
2951 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2953 curr_range_str = strategy_str;
2957 int maxs;
2958 char alg_name[128];
2959 char align[16];
2960 next_range_str = strchr (curr_range_str, ',');
2961 if (next_range_str)
2962 *next_range_str++ = '\0';
2964 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2965 alg_name, &maxs, align))
2967 error ("wrong arg %s to option %s", curr_range_str,
2968 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2969 return;
2972 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2974 error ("size ranges of option %s should be increasing",
2975 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2976 return;
2979 for (i = 0; i < last_alg; i++)
2980 if (!strcmp (alg_name, stringop_alg_names[i]))
2981 break;
2983 if (i == last_alg)
2985 error ("wrong stringop strategy name %s specified for option %s",
2986 alg_name,
2987 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2988 return;
2991 input_ranges[n].max = maxs;
2992 input_ranges[n].alg = (stringop_alg) i;
2993 if (!strcmp (align, "align"))
2994 input_ranges[n].noalign = false;
2995 else if (!strcmp (align, "noalign"))
2996 input_ranges[n].noalign = true;
2997 else
2999 error ("unknown alignment %s specified for option %s",
3000 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3001 return;
3003 n++;
3004 curr_range_str = next_range_str;
3006 while (curr_range_str);
3008 if (input_ranges[n - 1].max != -1)
3010 error ("the max value for the last size range should be -1"
3011 " for option %s",
3012 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3013 return;
3016 if (n > MAX_STRINGOP_ALGS)
3018 error ("too many size ranges specified in option %s",
3019 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3020 return;
3023 /* Now override the default algs array. */
3024 for (i = 0; i < n; i++)
3026 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3027 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3028 = input_ranges[i].alg;
3029 *const_cast<int *>(&default_algs->size[i].noalign)
3030 = input_ranges[i].noalign;
3035 /* parse -mtune-ctrl= option. When DUMP is true,
3036 print the features that are explicitly set. */
3038 static void
3039 parse_mtune_ctrl_str (bool dump)
3041 if (!ix86_tune_ctrl_string)
3042 return;
3044 char *next_feature_string = NULL;
3045 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3046 char *orig = curr_feature_string;
3047 int i;
3050 bool clear = false;
3052 next_feature_string = strchr (curr_feature_string, ',');
3053 if (next_feature_string)
3054 *next_feature_string++ = '\0';
3055 if (*curr_feature_string == '^')
3057 curr_feature_string++;
3058 clear = true;
3060 for (i = 0; i < X86_TUNE_LAST; i++)
3062 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3064 ix86_tune_features[i] = !clear;
3065 if (dump)
3066 fprintf (stderr, "Explicitly %s feature %s\n",
3067 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3068 break;
3071 if (i == X86_TUNE_LAST)
3072 error ("Unknown parameter to option -mtune-ctrl: %s",
3073 clear ? curr_feature_string - 1 : curr_feature_string);
3074 curr_feature_string = next_feature_string;
3076 while (curr_feature_string);
3077 free (orig);
3080 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3081 processor type. */
3083 static void
3084 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3086 unsigned int ix86_tune_mask = 1u << ix86_tune;
3087 int i;
3089 for (i = 0; i < X86_TUNE_LAST; ++i)
3091 if (ix86_tune_no_default)
3092 ix86_tune_features[i] = 0;
3093 else
3094 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3097 if (dump)
3099 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3100 for (i = 0; i < X86_TUNE_LAST; i++)
3101 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3102 ix86_tune_features[i] ? "on" : "off");
3105 parse_mtune_ctrl_str (dump);
3109 /* Default align_* from the processor table. */
3111 static void
3112 ix86_default_align (struct gcc_options *opts)
3114 if (opts->x_align_loops == 0)
3116 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3117 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3119 if (opts->x_align_jumps == 0)
3121 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3122 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3124 if (opts->x_align_functions == 0)
3126 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3130 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
3132 static void
3133 ix86_override_options_after_change (void)
3135 ix86_default_align (&global_options);
3138 /* Override various settings based on options. If MAIN_ARGS_P, the
3139 options are from the command line, otherwise they are from
3140 attributes. */
3142 static void
3143 ix86_option_override_internal (bool main_args_p,
3144 struct gcc_options *opts,
3145 struct gcc_options *opts_set)
3147 int i;
3148 unsigned int ix86_arch_mask;
3149 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3150 const char *prefix;
3151 const char *suffix;
3152 const char *sw;
3154 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3155 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3156 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3157 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3158 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3159 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3160 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3161 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3162 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3163 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3164 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3165 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3166 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3167 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3168 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3169 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3170 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3171 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3172 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3173 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3174 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3175 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3176 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3177 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3178 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3179 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3180 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3181 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3182 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3183 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3184 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3185 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3186 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3187 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3188 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3189 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3190 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3191 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3192 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3193 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3194 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3195 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3196 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3197 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3198 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3199 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3200 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3201 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3202 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3203 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3204 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3205 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3206 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3207 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3208 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3209 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3210 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
3212 #define PTA_CORE2 \
3213 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3214 | PTA_CX16 | PTA_FXSR)
3215 #define PTA_NEHALEM \
3216 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3217 #define PTA_WESTMERE \
3218 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3219 #define PTA_SANDYBRIDGE \
3220 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3221 #define PTA_IVYBRIDGE \
3222 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3223 #define PTA_HASWELL \
3224 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3225 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3226 #define PTA_BROADWELL \
3227 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3228 #define PTA_KNL \
3229 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3230 #define PTA_BONNELL \
3231 (PTA_CORE2 | PTA_MOVBE)
3232 #define PTA_SILVERMONT \
3233 (PTA_WESTMERE | PTA_MOVBE)
3235 /* if this reaches 64, need to widen struct pta flags below */
3237 static struct pta
3239 const char *const name; /* processor name or nickname. */
3240 const enum processor_type processor;
3241 const enum attr_cpu schedule;
3242 const unsigned HOST_WIDE_INT flags;
3244 const processor_alias_table[] =
3246 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3247 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3248 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3249 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3250 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3251 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3252 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3253 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3254 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3255 PTA_MMX | PTA_SSE | PTA_FXSR},
3256 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3257 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3258 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3259 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3260 PTA_MMX | PTA_SSE | PTA_FXSR},
3261 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3262 PTA_MMX | PTA_SSE | PTA_FXSR},
3263 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3264 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3265 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3266 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3267 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3268 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3269 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3270 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3271 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3272 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3273 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3274 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3275 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3276 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3277 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3278 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3279 PTA_SANDYBRIDGE},
3280 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3281 PTA_SANDYBRIDGE},
3282 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3283 PTA_IVYBRIDGE},
3284 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3285 PTA_IVYBRIDGE},
3286 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3287 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3288 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3289 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3290 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3291 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3292 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3293 {"knl", PROCESSOR_KNL, CPU_KNL, PTA_KNL},
3294 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3295 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3296 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3297 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3298 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3299 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3300 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3301 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3302 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3303 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3304 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3305 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3306 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3307 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3308 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3309 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3310 {"x86-64", PROCESSOR_K8, CPU_K8,
3311 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3312 {"k8", PROCESSOR_K8, CPU_K8,
3313 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3314 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3315 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3316 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3317 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3318 {"opteron", PROCESSOR_K8, CPU_K8,
3319 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3320 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3321 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3322 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3323 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3324 {"athlon64", PROCESSOR_K8, CPU_K8,
3325 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3326 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3327 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3328 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3329 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3330 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3331 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3332 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3333 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3334 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3335 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3336 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3337 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3338 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3339 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3340 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3341 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3342 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3343 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3344 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3345 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3346 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3347 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3348 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3349 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3350 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3351 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3352 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3353 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3354 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3355 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3356 | PTA_XSAVEOPT | PTA_FSGSBASE},
3357 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3358 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3359 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3360 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3361 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3362 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3363 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3364 | PTA_MOVBE},
3365 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3366 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3367 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3368 | PTA_FXSR | PTA_XSAVE},
3369 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3370 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3371 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3372 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3373 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3374 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3376 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3377 PTA_64BIT
3378 | PTA_HLE /* flags are only used for -march switch. */ },
3381 /* -mrecip options. */
3382 static struct
3384 const char *string; /* option name */
3385 unsigned int mask; /* mask bits to set */
3387 const recip_options[] =
3389 { "all", RECIP_MASK_ALL },
3390 { "none", RECIP_MASK_NONE },
3391 { "div", RECIP_MASK_DIV },
3392 { "sqrt", RECIP_MASK_SQRT },
3393 { "vec-div", RECIP_MASK_VEC_DIV },
3394 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3397 int const pta_size = ARRAY_SIZE (processor_alias_table);
3399 /* Set up prefix/suffix so the error messages refer to either the command
3400 line argument, or the attribute(target). */
3401 if (main_args_p)
3403 prefix = "-m";
3404 suffix = "";
3405 sw = "switch";
3407 else
3409 prefix = "option(\"";
3410 suffix = "\")";
3411 sw = "attribute";
3414 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3415 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3416 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3417 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3418 #ifdef TARGET_BI_ARCH
3419 else
3421 #if TARGET_BI_ARCH == 1
3422 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3423 is on and OPTION_MASK_ABI_X32 is off. We turn off
3424 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3425 -mx32. */
3426 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3427 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3428 #else
3429 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3430 on and OPTION_MASK_ABI_64 is off. We turn off
3431 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3432 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3433 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3434 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3435 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3436 #endif
3438 #endif
3440 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3442 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3443 OPTION_MASK_ABI_64 for TARGET_X32. */
3444 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3445 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3447 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3448 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3449 | OPTION_MASK_ABI_X32
3450 | OPTION_MASK_ABI_64);
3451 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3453 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3454 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3455 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3456 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3459 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3460 SUBTARGET_OVERRIDE_OPTIONS;
3461 #endif
3463 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3464 SUBSUBTARGET_OVERRIDE_OPTIONS;
3465 #endif
3467 /* -fPIC is the default for x86_64. */
3468 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3469 opts->x_flag_pic = 2;
3471 /* Need to check -mtune=generic first. */
3472 if (opts->x_ix86_tune_string)
3474 /* As special support for cross compilers we read -mtune=native
3475 as -mtune=generic. With native compilers we won't see the
3476 -mtune=native, as it was changed by the driver. */
3477 if (!strcmp (opts->x_ix86_tune_string, "native"))
3479 opts->x_ix86_tune_string = "generic";
3481 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3482 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3483 "%stune=k8%s or %stune=generic%s instead as appropriate",
3484 prefix, suffix, prefix, suffix, prefix, suffix);
3486 else
3488 if (opts->x_ix86_arch_string)
3489 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3490 if (!opts->x_ix86_tune_string)
3492 opts->x_ix86_tune_string
3493 = processor_target_table[TARGET_CPU_DEFAULT].name;
3494 ix86_tune_defaulted = 1;
3497 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3498 or defaulted. We need to use a sensible tune option. */
3499 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3501 opts->x_ix86_tune_string = "generic";
3505 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3506 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3508 /* rep; movq isn't available in 32-bit code. */
3509 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3510 opts->x_ix86_stringop_alg = no_stringop;
3513 if (!opts->x_ix86_arch_string)
3514 opts->x_ix86_arch_string
3515 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3516 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3517 else
3518 ix86_arch_specified = 1;
3520 if (opts_set->x_ix86_pmode)
3522 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3523 && opts->x_ix86_pmode == PMODE_SI)
3524 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3525 && opts->x_ix86_pmode == PMODE_DI))
3526 error ("address mode %qs not supported in the %s bit mode",
3527 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3528 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3530 else
3531 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3532 ? PMODE_DI : PMODE_SI;
3534 if (!opts_set->x_ix86_abi)
3535 opts->x_ix86_abi = DEFAULT_ABI;
3537 /* For targets using ms ABI enable ms-extensions, if not
3538 explicit turned off. For non-ms ABI we turn off this
3539 option. */
3540 if (!opts_set->x_flag_ms_extensions)
3541 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3543 if (opts_set->x_ix86_cmodel)
3545 switch (opts->x_ix86_cmodel)
3547 case CM_SMALL:
3548 case CM_SMALL_PIC:
3549 if (opts->x_flag_pic)
3550 opts->x_ix86_cmodel = CM_SMALL_PIC;
3551 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3552 error ("code model %qs not supported in the %s bit mode",
3553 "small", "32");
3554 break;
3556 case CM_MEDIUM:
3557 case CM_MEDIUM_PIC:
3558 if (opts->x_flag_pic)
3559 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3560 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3561 error ("code model %qs not supported in the %s bit mode",
3562 "medium", "32");
3563 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3564 error ("code model %qs not supported in x32 mode",
3565 "medium");
3566 break;
3568 case CM_LARGE:
3569 case CM_LARGE_PIC:
3570 if (opts->x_flag_pic)
3571 opts->x_ix86_cmodel = CM_LARGE_PIC;
3572 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3573 error ("code model %qs not supported in the %s bit mode",
3574 "large", "32");
3575 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3576 error ("code model %qs not supported in x32 mode",
3577 "large");
3578 break;
3580 case CM_32:
3581 if (opts->x_flag_pic)
3582 error ("code model %s does not support PIC mode", "32");
3583 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3584 error ("code model %qs not supported in the %s bit mode",
3585 "32", "64");
3586 break;
3588 case CM_KERNEL:
3589 if (opts->x_flag_pic)
3591 error ("code model %s does not support PIC mode", "kernel");
3592 opts->x_ix86_cmodel = CM_32;
3594 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3595 error ("code model %qs not supported in the %s bit mode",
3596 "kernel", "32");
3597 break;
3599 default:
3600 gcc_unreachable ();
3603 else
3605 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3606 use of rip-relative addressing. This eliminates fixups that
3607 would otherwise be needed if this object is to be placed in a
3608 DLL, and is essentially just as efficient as direct addressing. */
3609 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3610 && (TARGET_RDOS || TARGET_PECOFF))
3611 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3612 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3613 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3614 else
3615 opts->x_ix86_cmodel = CM_32;
3617 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3619 error ("-masm=intel not supported in this configuration");
3620 opts->x_ix86_asm_dialect = ASM_ATT;
3622 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3623 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3624 sorry ("%i-bit mode not compiled in",
3625 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3627 for (i = 0; i < pta_size; i++)
3628 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3630 ix86_schedule = processor_alias_table[i].schedule;
3631 ix86_arch = processor_alias_table[i].processor;
3632 /* Default cpu tuning to the architecture. */
3633 ix86_tune = ix86_arch;
3635 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3636 && !(processor_alias_table[i].flags & PTA_64BIT))
3637 error ("CPU you selected does not support x86-64 "
3638 "instruction set");
3640 if (processor_alias_table[i].flags & PTA_MMX
3641 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3642 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3643 if (processor_alias_table[i].flags & PTA_3DNOW
3644 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3645 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3646 if (processor_alias_table[i].flags & PTA_3DNOW_A
3647 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3648 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3649 if (processor_alias_table[i].flags & PTA_SSE
3650 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3651 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3652 if (processor_alias_table[i].flags & PTA_SSE2
3653 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3654 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3655 if (processor_alias_table[i].flags & PTA_SSE3
3656 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3657 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3658 if (processor_alias_table[i].flags & PTA_SSSE3
3659 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3660 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3661 if (processor_alias_table[i].flags & PTA_SSE4_1
3662 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3663 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3664 if (processor_alias_table[i].flags & PTA_SSE4_2
3665 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3666 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3667 if (processor_alias_table[i].flags & PTA_AVX
3668 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3669 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3670 if (processor_alias_table[i].flags & PTA_AVX2
3671 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3672 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3673 if (processor_alias_table[i].flags & PTA_FMA
3674 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3675 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3676 if (processor_alias_table[i].flags & PTA_SSE4A
3677 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3678 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3679 if (processor_alias_table[i].flags & PTA_FMA4
3680 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3681 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3682 if (processor_alias_table[i].flags & PTA_XOP
3683 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3684 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3685 if (processor_alias_table[i].flags & PTA_LWP
3686 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3687 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3688 if (processor_alias_table[i].flags & PTA_ABM
3689 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3690 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3691 if (processor_alias_table[i].flags & PTA_BMI
3692 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3693 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3694 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3695 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3696 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3697 if (processor_alias_table[i].flags & PTA_TBM
3698 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3699 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3700 if (processor_alias_table[i].flags & PTA_BMI2
3701 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3702 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3703 if (processor_alias_table[i].flags & PTA_CX16
3704 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3705 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3706 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3707 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3708 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3709 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3710 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3711 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3712 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3713 if (processor_alias_table[i].flags & PTA_MOVBE
3714 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3715 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3716 if (processor_alias_table[i].flags & PTA_AES
3717 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3718 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3719 if (processor_alias_table[i].flags & PTA_SHA
3720 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3721 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3722 if (processor_alias_table[i].flags & PTA_PCLMUL
3723 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3724 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3725 if (processor_alias_table[i].flags & PTA_FSGSBASE
3726 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3727 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3728 if (processor_alias_table[i].flags & PTA_RDRND
3729 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3730 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3731 if (processor_alias_table[i].flags & PTA_F16C
3732 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3733 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3734 if (processor_alias_table[i].flags & PTA_RTM
3735 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3736 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3737 if (processor_alias_table[i].flags & PTA_HLE
3738 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3739 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3740 if (processor_alias_table[i].flags & PTA_PRFCHW
3741 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3742 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3743 if (processor_alias_table[i].flags & PTA_RDSEED
3744 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3745 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3746 if (processor_alias_table[i].flags & PTA_ADX
3747 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3748 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3749 if (processor_alias_table[i].flags & PTA_FXSR
3750 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3751 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3752 if (processor_alias_table[i].flags & PTA_XSAVE
3753 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3754 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3755 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3756 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3757 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3758 if (processor_alias_table[i].flags & PTA_AVX512F
3759 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3760 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3761 if (processor_alias_table[i].flags & PTA_AVX512ER
3762 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3763 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3764 if (processor_alias_table[i].flags & PTA_AVX512PF
3765 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3766 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3767 if (processor_alias_table[i].flags & PTA_AVX512CD
3768 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3769 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3770 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3771 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3772 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3773 if (processor_alias_table[i].flags & PTA_PCOMMIT
3774 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
3775 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
3776 if (processor_alias_table[i].flags & PTA_CLWB
3777 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3778 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3779 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3780 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3781 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3782 if (processor_alias_table[i].flags & PTA_XSAVEC
3783 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3784 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3785 if (processor_alias_table[i].flags & PTA_XSAVES
3786 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3787 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3788 if (processor_alias_table[i].flags & PTA_AVX512DQ
3789 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3790 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3791 if (processor_alias_table[i].flags & PTA_AVX512BW
3792 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3793 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3794 if (processor_alias_table[i].flags & PTA_AVX512VL
3795 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3796 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3797 if (processor_alias_table[i].flags & PTA_MPX
3798 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3799 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3800 if (processor_alias_table[i].flags & PTA_AVX512VBMI
3801 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3802 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3803 if (processor_alias_table[i].flags & PTA_AVX512IFMA
3804 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3805 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3806 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3807 x86_prefetch_sse = true;
3809 break;
3812 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3813 error ("Intel MPX does not support x32");
3815 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3816 error ("Intel MPX does not support x32");
3818 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3819 error ("generic CPU can be used only for %stune=%s %s",
3820 prefix, suffix, sw);
3821 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3822 error ("intel CPU can be used only for %stune=%s %s",
3823 prefix, suffix, sw);
3824 else if (i == pta_size)
3825 error ("bad value (%s) for %sarch=%s %s",
3826 opts->x_ix86_arch_string, prefix, suffix, sw);
3828 ix86_arch_mask = 1u << ix86_arch;
3829 for (i = 0; i < X86_ARCH_LAST; ++i)
3830 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3832 for (i = 0; i < pta_size; i++)
3833 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3835 ix86_schedule = processor_alias_table[i].schedule;
3836 ix86_tune = processor_alias_table[i].processor;
3837 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3839 if (!(processor_alias_table[i].flags & PTA_64BIT))
3841 if (ix86_tune_defaulted)
3843 opts->x_ix86_tune_string = "x86-64";
3844 for (i = 0; i < pta_size; i++)
3845 if (! strcmp (opts->x_ix86_tune_string,
3846 processor_alias_table[i].name))
3847 break;
3848 ix86_schedule = processor_alias_table[i].schedule;
3849 ix86_tune = processor_alias_table[i].processor;
3851 else
3852 error ("CPU you selected does not support x86-64 "
3853 "instruction set");
3856 /* Intel CPUs have always interpreted SSE prefetch instructions as
3857 NOPs; so, we can enable SSE prefetch instructions even when
3858 -mtune (rather than -march) points us to a processor that has them.
3859 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3860 higher processors. */
3861 if (TARGET_CMOV
3862 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3863 x86_prefetch_sse = true;
3864 break;
3867 if (ix86_tune_specified && i == pta_size)
3868 error ("bad value (%s) for %stune=%s %s",
3869 opts->x_ix86_tune_string, prefix, suffix, sw);
3871 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3873 #ifndef USE_IX86_FRAME_POINTER
3874 #define USE_IX86_FRAME_POINTER 0
3875 #endif
3877 #ifndef USE_X86_64_FRAME_POINTER
3878 #define USE_X86_64_FRAME_POINTER 0
3879 #endif
3881 /* Set the default values for switches whose default depends on TARGET_64BIT
3882 in case they weren't overwritten by command line options. */
3883 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3885 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3886 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3887 if (opts->x_flag_asynchronous_unwind_tables
3888 && !opts_set->x_flag_unwind_tables
3889 && TARGET_64BIT_MS_ABI)
3890 opts->x_flag_unwind_tables = 1;
3891 if (opts->x_flag_asynchronous_unwind_tables == 2)
3892 opts->x_flag_unwind_tables
3893 = opts->x_flag_asynchronous_unwind_tables = 1;
3894 if (opts->x_flag_pcc_struct_return == 2)
3895 opts->x_flag_pcc_struct_return = 0;
3897 else
3899 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3900 opts->x_flag_omit_frame_pointer
3901 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3902 if (opts->x_flag_asynchronous_unwind_tables == 2)
3903 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3904 if (opts->x_flag_pcc_struct_return == 2)
3905 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3908 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3909 /* TODO: ix86_cost should be chosen at instruction or function granuality
3910 so for cold code we use size_cost even in !optimize_size compilation. */
3911 if (opts->x_optimize_size)
3912 ix86_cost = &ix86_size_cost;
3913 else
3914 ix86_cost = ix86_tune_cost;
3916 /* Arrange to set up i386_stack_locals for all functions. */
3917 init_machine_status = ix86_init_machine_status;
3919 /* Validate -mregparm= value. */
3920 if (opts_set->x_ix86_regparm)
3922 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3923 warning (0, "-mregparm is ignored in 64-bit mode");
3924 if (opts->x_ix86_regparm > REGPARM_MAX)
3926 error ("-mregparm=%d is not between 0 and %d",
3927 opts->x_ix86_regparm, REGPARM_MAX);
3928 opts->x_ix86_regparm = 0;
3931 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3932 opts->x_ix86_regparm = REGPARM_MAX;
3934 /* Default align_* from the processor table. */
3935 ix86_default_align (opts);
3937 /* Provide default for -mbranch-cost= value. */
3938 if (!opts_set->x_ix86_branch_cost)
3939 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
3941 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3943 opts->x_target_flags
3944 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3946 /* Enable by default the SSE and MMX builtins. Do allow the user to
3947 explicitly disable any of these. In particular, disabling SSE and
3948 MMX for kernel code is extremely useful. */
3949 if (!ix86_arch_specified)
3950 opts->x_ix86_isa_flags
3951 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3952 | TARGET_SUBTARGET64_ISA_DEFAULT)
3953 & ~opts->x_ix86_isa_flags_explicit);
3955 if (TARGET_RTD_P (opts->x_target_flags))
3956 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3958 else
3960 opts->x_target_flags
3961 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3963 if (!ix86_arch_specified)
3964 opts->x_ix86_isa_flags
3965 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3967 /* i386 ABI does not specify red zone. It still makes sense to use it
3968 when programmer takes care to stack from being destroyed. */
3969 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3970 opts->x_target_flags |= MASK_NO_RED_ZONE;
3973 /* Keep nonleaf frame pointers. */
3974 if (opts->x_flag_omit_frame_pointer)
3975 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3976 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3977 opts->x_flag_omit_frame_pointer = 1;
3979 /* If we're doing fast math, we don't care about comparison order
3980 wrt NaNs. This lets us use a shorter comparison sequence. */
3981 if (opts->x_flag_finite_math_only)
3982 opts->x_target_flags &= ~MASK_IEEE_FP;
3984 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3985 since the insns won't need emulation. */
3986 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3987 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3989 /* Likewise, if the target doesn't have a 387, or we've specified
3990 software floating point, don't use 387 inline intrinsics. */
3991 if (!TARGET_80387_P (opts->x_target_flags))
3992 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3994 /* Turn on MMX builtins for -msse. */
3995 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3996 opts->x_ix86_isa_flags
3997 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3999 /* Enable SSE prefetch. */
4000 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
4001 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
4002 x86_prefetch_sse = true;
4004 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
4005 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
4006 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
4007 opts->x_ix86_isa_flags
4008 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
4010 /* Enable popcnt instruction for -msse4.2 or -mabm. */
4011 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
4012 || TARGET_ABM_P (opts->x_ix86_isa_flags))
4013 opts->x_ix86_isa_flags
4014 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
4016 /* Enable lzcnt instruction for -mabm. */
4017 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4018 opts->x_ix86_isa_flags
4019 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4021 /* Validate -mpreferred-stack-boundary= value or default it to
4022 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4023 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4024 if (opts_set->x_ix86_preferred_stack_boundary_arg)
4026 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4027 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
4028 int max = (TARGET_SEH ? 4 : 12);
4030 if (opts->x_ix86_preferred_stack_boundary_arg < min
4031 || opts->x_ix86_preferred_stack_boundary_arg > max)
4033 if (min == max)
4034 error ("-mpreferred-stack-boundary is not supported "
4035 "for this target");
4036 else
4037 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4038 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4040 else
4041 ix86_preferred_stack_boundary
4042 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4045 /* Set the default value for -mstackrealign. */
4046 if (opts->x_ix86_force_align_arg_pointer == -1)
4047 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4049 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4051 /* Validate -mincoming-stack-boundary= value or default it to
4052 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4053 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4054 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4056 if (opts->x_ix86_incoming_stack_boundary_arg
4057 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
4058 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4059 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4060 opts->x_ix86_incoming_stack_boundary_arg,
4061 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
4062 else
4064 ix86_user_incoming_stack_boundary
4065 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4066 ix86_incoming_stack_boundary
4067 = ix86_user_incoming_stack_boundary;
4071 #ifndef NO_PROFILE_COUNTERS
4072 if (flag_nop_mcount)
4073 error ("-mnop-mcount is not compatible with this target");
4074 #endif
4075 if (flag_nop_mcount && flag_pic)
4076 error ("-mnop-mcount is not implemented for -fPIC");
4078 /* Accept -msseregparm only if at least SSE support is enabled. */
4079 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4080 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4081 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4083 if (opts_set->x_ix86_fpmath)
4085 if (opts->x_ix86_fpmath & FPMATH_SSE)
4087 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4089 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4090 opts->x_ix86_fpmath = FPMATH_387;
4092 else if ((opts->x_ix86_fpmath & FPMATH_387)
4093 && !TARGET_80387_P (opts->x_target_flags))
4095 warning (0, "387 instruction set disabled, using SSE arithmetics");
4096 opts->x_ix86_fpmath = FPMATH_SSE;
4100 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4101 fpmath=387. The second is however default at many targets since the
4102 extra 80bit precision of temporaries is considered to be part of ABI.
4103 Overwrite the default at least for -ffast-math.
4104 TODO: -mfpmath=both seems to produce same performing code with bit
4105 smaller binaries. It is however not clear if register allocation is
4106 ready for this setting.
4107 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4108 codegen. We may switch to 387 with -ffast-math for size optimized
4109 functions. */
4110 else if (fast_math_flags_set_p (&global_options)
4111 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4112 opts->x_ix86_fpmath = FPMATH_SSE;
4113 else
4114 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4116 /* If the i387 is disabled, then do not return values in it. */
4117 if (!TARGET_80387_P (opts->x_target_flags))
4118 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4120 /* Use external vectorized library in vectorizing intrinsics. */
4121 if (opts_set->x_ix86_veclibabi_type)
4122 switch (opts->x_ix86_veclibabi_type)
4124 case ix86_veclibabi_type_svml:
4125 ix86_veclib_handler = ix86_veclibabi_svml;
4126 break;
4128 case ix86_veclibabi_type_acml:
4129 ix86_veclib_handler = ix86_veclibabi_acml;
4130 break;
4132 default:
4133 gcc_unreachable ();
4136 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4137 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4138 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4140 /* If stack probes are required, the space used for large function
4141 arguments on the stack must also be probed, so enable
4142 -maccumulate-outgoing-args so this happens in the prologue. */
4143 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4144 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4146 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4147 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4148 "for correctness", prefix, suffix);
4149 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4152 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4154 char *p;
4155 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4156 p = strchr (internal_label_prefix, 'X');
4157 internal_label_prefix_len = p - internal_label_prefix;
4158 *p = '\0';
4161 /* When scheduling description is not available, disable scheduler pass
4162 so it won't slow down the compilation and make x87 code slower. */
4163 if (!TARGET_SCHEDULE)
4164 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4166 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4167 ix86_tune_cost->simultaneous_prefetches,
4168 opts->x_param_values,
4169 opts_set->x_param_values);
4170 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4171 ix86_tune_cost->prefetch_block,
4172 opts->x_param_values,
4173 opts_set->x_param_values);
4174 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4175 ix86_tune_cost->l1_cache_size,
4176 opts->x_param_values,
4177 opts_set->x_param_values);
4178 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4179 ix86_tune_cost->l2_cache_size,
4180 opts->x_param_values,
4181 opts_set->x_param_values);
4183 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4184 if (opts->x_flag_prefetch_loop_arrays < 0
4185 && HAVE_prefetch
4186 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4187 && !opts->x_optimize_size
4188 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4189 opts->x_flag_prefetch_loop_arrays = 1;
4191 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4192 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4193 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4194 targetm.expand_builtin_va_start = NULL;
4196 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4198 ix86_gen_leave = gen_leave_rex64;
4199 if (Pmode == DImode)
4201 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4202 ix86_gen_tls_local_dynamic_base_64
4203 = gen_tls_local_dynamic_base_64_di;
4205 else
4207 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4208 ix86_gen_tls_local_dynamic_base_64
4209 = gen_tls_local_dynamic_base_64_si;
4212 else
4213 ix86_gen_leave = gen_leave;
4215 if (Pmode == DImode)
4217 ix86_gen_add3 = gen_adddi3;
4218 ix86_gen_sub3 = gen_subdi3;
4219 ix86_gen_sub3_carry = gen_subdi3_carry;
4220 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4221 ix86_gen_andsp = gen_anddi3;
4222 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4223 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4224 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4225 ix86_gen_monitor = gen_sse3_monitor_di;
4227 else
4229 ix86_gen_add3 = gen_addsi3;
4230 ix86_gen_sub3 = gen_subsi3;
4231 ix86_gen_sub3_carry = gen_subsi3_carry;
4232 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4233 ix86_gen_andsp = gen_andsi3;
4234 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4235 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4236 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4237 ix86_gen_monitor = gen_sse3_monitor_si;
4240 #ifdef USE_IX86_CLD
4241 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4242 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4243 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4244 #endif
4246 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4248 if (opts->x_flag_fentry > 0)
4249 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4250 "with -fpic");
4251 opts->x_flag_fentry = 0;
4253 else if (TARGET_SEH)
4255 if (opts->x_flag_fentry == 0)
4256 sorry ("-mno-fentry isn%'t compatible with SEH");
4257 opts->x_flag_fentry = 1;
4259 else if (opts->x_flag_fentry < 0)
4261 #if defined(PROFILE_BEFORE_PROLOGUE)
4262 opts->x_flag_fentry = 1;
4263 #else
4264 opts->x_flag_fentry = 0;
4265 #endif
4268 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
4269 opts->x_target_flags |= MASK_VZEROUPPER;
4270 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4271 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4272 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4273 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4274 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4275 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4276 /* Enable 128-bit AVX instruction generation
4277 for the auto-vectorizer. */
4278 if (TARGET_AVX128_OPTIMAL
4279 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4280 opts->x_target_flags |= MASK_PREFER_AVX128;
4282 if (opts->x_ix86_recip_name)
4284 char *p = ASTRDUP (opts->x_ix86_recip_name);
4285 char *q;
4286 unsigned int mask, i;
4287 bool invert;
4289 while ((q = strtok (p, ",")) != NULL)
4291 p = NULL;
4292 if (*q == '!')
4294 invert = true;
4295 q++;
4297 else
4298 invert = false;
4300 if (!strcmp (q, "default"))
4301 mask = RECIP_MASK_ALL;
4302 else
4304 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4305 if (!strcmp (q, recip_options[i].string))
4307 mask = recip_options[i].mask;
4308 break;
4311 if (i == ARRAY_SIZE (recip_options))
4313 error ("unknown option for -mrecip=%s", q);
4314 invert = false;
4315 mask = RECIP_MASK_NONE;
4319 opts->x_recip_mask_explicit |= mask;
4320 if (invert)
4321 opts->x_recip_mask &= ~mask;
4322 else
4323 opts->x_recip_mask |= mask;
4327 if (TARGET_RECIP_P (opts->x_target_flags))
4328 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4329 else if (opts_set->x_target_flags & MASK_RECIP)
4330 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4332 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4333 for 64-bit Bionic. */
4334 if (TARGET_HAS_BIONIC
4335 && !(opts_set->x_target_flags
4336 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4337 opts->x_target_flags |= (TARGET_64BIT
4338 ? MASK_LONG_DOUBLE_128
4339 : MASK_LONG_DOUBLE_64);
4341 /* Only one of them can be active. */
4342 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4343 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4345 /* Save the initial options in case the user does function specific
4346 options. */
4347 if (main_args_p)
4348 target_option_default_node = target_option_current_node
4349 = build_target_option_node (opts);
4351 /* Handle stack protector */
4352 if (!opts_set->x_ix86_stack_protector_guard)
4353 opts->x_ix86_stack_protector_guard
4354 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4356 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4357 if (opts->x_ix86_tune_memcpy_strategy)
4359 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4360 ix86_parse_stringop_strategy_string (str, false);
4361 free (str);
4364 if (opts->x_ix86_tune_memset_strategy)
4366 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4367 ix86_parse_stringop_strategy_string (str, true);
4368 free (str);
4372 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4374 static void
4375 ix86_option_override (void)
4377 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4378 struct register_pass_info insert_vzeroupper_info
4379 = { pass_insert_vzeroupper, "reload",
4380 1, PASS_POS_INSERT_AFTER
4383 ix86_option_override_internal (true, &global_options, &global_options_set);
4386 /* This needs to be done at start up. It's convenient to do it here. */
4387 register_pass (&insert_vzeroupper_info);
4390 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4391 static char *
4392 ix86_offload_options (void)
4394 if (TARGET_LP64)
4395 return xstrdup ("-foffload-abi=lp64");
4396 return xstrdup ("-foffload-abi=ilp32");
4399 /* Update register usage after having seen the compiler flags. */
4401 static void
4402 ix86_conditional_register_usage (void)
4404 int i, c_mask;
4406 /* For 32-bit targets, squash the REX registers. */
4407 if (! TARGET_64BIT)
4409 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4410 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4411 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4412 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4413 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4414 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4417 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4418 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4419 : TARGET_64BIT ? (1 << 2)
4420 : (1 << 1));
4422 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4424 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4426 /* Set/reset conditionally defined registers from
4427 CALL_USED_REGISTERS initializer. */
4428 if (call_used_regs[i] > 1)
4429 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4431 /* Calculate registers of CLOBBERED_REGS register set
4432 as call used registers from GENERAL_REGS register set. */
4433 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4434 && call_used_regs[i])
4435 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4438 /* If MMX is disabled, squash the registers. */
4439 if (! TARGET_MMX)
4440 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4441 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4442 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4444 /* If SSE is disabled, squash the registers. */
4445 if (! TARGET_SSE)
4446 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4447 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4448 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4450 /* If the FPU is disabled, squash the registers. */
4451 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4452 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4453 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4454 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4456 /* If AVX512F is disabled, squash the registers. */
4457 if (! TARGET_AVX512F)
4459 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4460 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4462 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4463 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4466 /* If MPX is disabled, squash the registers. */
4467 if (! TARGET_MPX)
4468 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4469 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4473 /* Save the current options */
4475 static void
4476 ix86_function_specific_save (struct cl_target_option *ptr,
4477 struct gcc_options *opts)
4479 ptr->arch = ix86_arch;
4480 ptr->schedule = ix86_schedule;
4481 ptr->prefetch_sse = x86_prefetch_sse;
4482 ptr->tune = ix86_tune;
4483 ptr->branch_cost = ix86_branch_cost;
4484 ptr->tune_defaulted = ix86_tune_defaulted;
4485 ptr->arch_specified = ix86_arch_specified;
4486 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4487 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4488 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4489 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4490 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4491 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4492 ptr->x_ix86_abi = opts->x_ix86_abi;
4493 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4494 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4495 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4496 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4497 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4498 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4499 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4500 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4501 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4502 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4503 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4504 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4505 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4506 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4507 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4508 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4509 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4510 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4511 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4512 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4514 /* The fields are char but the variables are not; make sure the
4515 values fit in the fields. */
4516 gcc_assert (ptr->arch == ix86_arch);
4517 gcc_assert (ptr->schedule == ix86_schedule);
4518 gcc_assert (ptr->tune == ix86_tune);
4519 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4522 /* Restore the current options */
4524 static void
4525 ix86_function_specific_restore (struct gcc_options *opts,
4526 struct cl_target_option *ptr)
4528 enum processor_type old_tune = ix86_tune;
4529 enum processor_type old_arch = ix86_arch;
4530 unsigned int ix86_arch_mask;
4531 int i;
4533 /* We don't change -fPIC. */
4534 opts->x_flag_pic = flag_pic;
4536 ix86_arch = (enum processor_type) ptr->arch;
4537 ix86_schedule = (enum attr_cpu) ptr->schedule;
4538 ix86_tune = (enum processor_type) ptr->tune;
4539 x86_prefetch_sse = ptr->prefetch_sse;
4540 opts->x_ix86_branch_cost = ptr->branch_cost;
4541 ix86_tune_defaulted = ptr->tune_defaulted;
4542 ix86_arch_specified = ptr->arch_specified;
4543 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4544 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4545 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4546 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4547 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4548 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4549 opts->x_ix86_abi = ptr->x_ix86_abi;
4550 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4551 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4552 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4553 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4554 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4555 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4556 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4557 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4558 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4559 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4560 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4561 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4562 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4563 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4564 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4565 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4566 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4567 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4568 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4569 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4570 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4571 /* TODO: ix86_cost should be chosen at instruction or function granuality
4572 so for cold code we use size_cost even in !optimize_size compilation. */
4573 if (opts->x_optimize_size)
4574 ix86_cost = &ix86_size_cost;
4575 else
4576 ix86_cost = ix86_tune_cost;
4578 /* Recreate the arch feature tests if the arch changed */
4579 if (old_arch != ix86_arch)
4581 ix86_arch_mask = 1u << ix86_arch;
4582 for (i = 0; i < X86_ARCH_LAST; ++i)
4583 ix86_arch_features[i]
4584 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4587 /* Recreate the tune optimization tests */
4588 if (old_tune != ix86_tune)
4589 set_ix86_tune_features (ix86_tune, false);
4592 /* Adjust target options after streaming them in. This is mainly about
4593 reconciling them with global options. */
4595 static void
4596 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
4598 /* flag_pic is a global option, but ix86_cmodel is target saved option
4599 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
4600 for PIC, or error out. */
4601 if (flag_pic)
4602 switch (ptr->x_ix86_cmodel)
4604 case CM_SMALL:
4605 ptr->x_ix86_cmodel = CM_SMALL_PIC;
4606 break;
4608 case CM_MEDIUM:
4609 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
4610 break;
4612 case CM_LARGE:
4613 ptr->x_ix86_cmodel = CM_LARGE_PIC;
4614 break;
4616 case CM_KERNEL:
4617 error ("code model %s does not support PIC mode", "kernel");
4618 break;
4620 default:
4621 break;
4623 else
4624 switch (ptr->x_ix86_cmodel)
4626 case CM_SMALL_PIC:
4627 ptr->x_ix86_cmodel = CM_SMALL;
4628 break;
4630 case CM_MEDIUM_PIC:
4631 ptr->x_ix86_cmodel = CM_MEDIUM;
4632 break;
4634 case CM_LARGE_PIC:
4635 ptr->x_ix86_cmodel = CM_LARGE;
4636 break;
4638 default:
4639 break;
4643 /* Print the current options */
4645 static void
4646 ix86_function_specific_print (FILE *file, int indent,
4647 struct cl_target_option *ptr)
4649 char *target_string
4650 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4651 NULL, NULL, ptr->x_ix86_fpmath, false);
4653 gcc_assert (ptr->arch < PROCESSOR_max);
4654 fprintf (file, "%*sarch = %d (%s)\n",
4655 indent, "",
4656 ptr->arch, processor_target_table[ptr->arch].name);
4658 gcc_assert (ptr->tune < PROCESSOR_max);
4659 fprintf (file, "%*stune = %d (%s)\n",
4660 indent, "",
4661 ptr->tune, processor_target_table[ptr->tune].name);
4663 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4665 if (target_string)
4667 fprintf (file, "%*s%s\n", indent, "", target_string);
4668 free (target_string);
4673 /* Inner function to process the attribute((target(...))), take an argument and
4674 set the current options from the argument. If we have a list, recursively go
4675 over the list. */
4677 static bool
4678 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4679 struct gcc_options *opts,
4680 struct gcc_options *opts_set,
4681 struct gcc_options *enum_opts_set)
4683 char *next_optstr;
4684 bool ret = true;
4686 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4687 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4688 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4689 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4690 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4692 enum ix86_opt_type
4694 ix86_opt_unknown,
4695 ix86_opt_yes,
4696 ix86_opt_no,
4697 ix86_opt_str,
4698 ix86_opt_enum,
4699 ix86_opt_isa
4702 static const struct
4704 const char *string;
4705 size_t len;
4706 enum ix86_opt_type type;
4707 int opt;
4708 int mask;
4709 } attrs[] = {
4710 /* isa options */
4711 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4712 IX86_ATTR_ISA ("abm", OPT_mabm),
4713 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4714 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4715 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4716 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4717 IX86_ATTR_ISA ("aes", OPT_maes),
4718 IX86_ATTR_ISA ("sha", OPT_msha),
4719 IX86_ATTR_ISA ("avx", OPT_mavx),
4720 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4721 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4722 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4723 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4724 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4725 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4726 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4727 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4728 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4729 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4730 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4731 IX86_ATTR_ISA ("sse", OPT_msse),
4732 IX86_ATTR_ISA ("sse2", OPT_msse2),
4733 IX86_ATTR_ISA ("sse3", OPT_msse3),
4734 IX86_ATTR_ISA ("sse4", OPT_msse4),
4735 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4736 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4737 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4738 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4739 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4740 IX86_ATTR_ISA ("fma", OPT_mfma),
4741 IX86_ATTR_ISA ("xop", OPT_mxop),
4742 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4743 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4744 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4745 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4746 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4747 IX86_ATTR_ISA ("hle", OPT_mhle),
4748 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4749 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4750 IX86_ATTR_ISA ("adx", OPT_madx),
4751 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4752 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4753 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4754 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4755 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4756 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4757 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4758 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
4759 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
4760 IX86_ATTR_ISA ("clwb", OPT_mclwb),
4761 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
4763 /* enum options */
4764 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4766 /* string options */
4767 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4768 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4770 /* flag options */
4771 IX86_ATTR_YES ("cld",
4772 OPT_mcld,
4773 MASK_CLD),
4775 IX86_ATTR_NO ("fancy-math-387",
4776 OPT_mfancy_math_387,
4777 MASK_NO_FANCY_MATH_387),
4779 IX86_ATTR_YES ("ieee-fp",
4780 OPT_mieee_fp,
4781 MASK_IEEE_FP),
4783 IX86_ATTR_YES ("inline-all-stringops",
4784 OPT_minline_all_stringops,
4785 MASK_INLINE_ALL_STRINGOPS),
4787 IX86_ATTR_YES ("inline-stringops-dynamically",
4788 OPT_minline_stringops_dynamically,
4789 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4791 IX86_ATTR_NO ("align-stringops",
4792 OPT_mno_align_stringops,
4793 MASK_NO_ALIGN_STRINGOPS),
4795 IX86_ATTR_YES ("recip",
4796 OPT_mrecip,
4797 MASK_RECIP),
4801 /* If this is a list, recurse to get the options. */
4802 if (TREE_CODE (args) == TREE_LIST)
4804 bool ret = true;
4806 for (; args; args = TREE_CHAIN (args))
4807 if (TREE_VALUE (args)
4808 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4809 p_strings, opts, opts_set,
4810 enum_opts_set))
4811 ret = false;
4813 return ret;
4816 else if (TREE_CODE (args) != STRING_CST)
4818 error ("attribute %<target%> argument not a string");
4819 return false;
4822 /* Handle multiple arguments separated by commas. */
4823 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4825 while (next_optstr && *next_optstr != '\0')
4827 char *p = next_optstr;
4828 char *orig_p = p;
4829 char *comma = strchr (next_optstr, ',');
4830 const char *opt_string;
4831 size_t len, opt_len;
4832 int opt;
4833 bool opt_set_p;
4834 char ch;
4835 unsigned i;
4836 enum ix86_opt_type type = ix86_opt_unknown;
4837 int mask = 0;
4839 if (comma)
4841 *comma = '\0';
4842 len = comma - next_optstr;
4843 next_optstr = comma + 1;
4845 else
4847 len = strlen (p);
4848 next_optstr = NULL;
4851 /* Recognize no-xxx. */
4852 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4854 opt_set_p = false;
4855 p += 3;
4856 len -= 3;
4858 else
4859 opt_set_p = true;
4861 /* Find the option. */
4862 ch = *p;
4863 opt = N_OPTS;
4864 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4866 type = attrs[i].type;
4867 opt_len = attrs[i].len;
4868 if (ch == attrs[i].string[0]
4869 && ((type != ix86_opt_str && type != ix86_opt_enum)
4870 ? len == opt_len
4871 : len > opt_len)
4872 && memcmp (p, attrs[i].string, opt_len) == 0)
4874 opt = attrs[i].opt;
4875 mask = attrs[i].mask;
4876 opt_string = attrs[i].string;
4877 break;
4881 /* Process the option. */
4882 if (opt == N_OPTS)
4884 error ("attribute(target(\"%s\")) is unknown", orig_p);
4885 ret = false;
4888 else if (type == ix86_opt_isa)
4890 struct cl_decoded_option decoded;
4892 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4893 ix86_handle_option (opts, opts_set,
4894 &decoded, input_location);
4897 else if (type == ix86_opt_yes || type == ix86_opt_no)
4899 if (type == ix86_opt_no)
4900 opt_set_p = !opt_set_p;
4902 if (opt_set_p)
4903 opts->x_target_flags |= mask;
4904 else
4905 opts->x_target_flags &= ~mask;
4908 else if (type == ix86_opt_str)
4910 if (p_strings[opt])
4912 error ("option(\"%s\") was already specified", opt_string);
4913 ret = false;
4915 else
4916 p_strings[opt] = xstrdup (p + opt_len);
4919 else if (type == ix86_opt_enum)
4921 bool arg_ok;
4922 int value;
4924 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4925 if (arg_ok)
4926 set_option (opts, enum_opts_set, opt, value,
4927 p + opt_len, DK_UNSPECIFIED, input_location,
4928 global_dc);
4929 else
4931 error ("attribute(target(\"%s\")) is unknown", orig_p);
4932 ret = false;
4936 else
4937 gcc_unreachable ();
4940 return ret;
4943 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4945 tree
4946 ix86_valid_target_attribute_tree (tree args,
4947 struct gcc_options *opts,
4948 struct gcc_options *opts_set)
4950 const char *orig_arch_string = opts->x_ix86_arch_string;
4951 const char *orig_tune_string = opts->x_ix86_tune_string;
4952 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4953 int orig_tune_defaulted = ix86_tune_defaulted;
4954 int orig_arch_specified = ix86_arch_specified;
4955 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4956 tree t = NULL_TREE;
4957 int i;
4958 struct cl_target_option *def
4959 = TREE_TARGET_OPTION (target_option_default_node);
4960 struct gcc_options enum_opts_set;
4962 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4964 /* Process each of the options on the chain. */
4965 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4966 opts_set, &enum_opts_set))
4967 return error_mark_node;
4969 /* If the changed options are different from the default, rerun
4970 ix86_option_override_internal, and then save the options away.
4971 The string options are are attribute options, and will be undone
4972 when we copy the save structure. */
4973 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4974 || opts->x_target_flags != def->x_target_flags
4975 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4976 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4977 || enum_opts_set.x_ix86_fpmath)
4979 /* If we are using the default tune= or arch=, undo the string assigned,
4980 and use the default. */
4981 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4982 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4983 else if (!orig_arch_specified)
4984 opts->x_ix86_arch_string = NULL;
4986 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4987 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4988 else if (orig_tune_defaulted)
4989 opts->x_ix86_tune_string = NULL;
4991 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4992 if (enum_opts_set.x_ix86_fpmath)
4993 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4994 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4995 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4997 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4998 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5001 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
5002 ix86_option_override_internal (false, opts, opts_set);
5004 /* Add any builtin functions with the new isa if any. */
5005 ix86_add_new_builtins (opts->x_ix86_isa_flags);
5007 /* Save the current options unless we are validating options for
5008 #pragma. */
5009 t = build_target_option_node (opts);
5011 opts->x_ix86_arch_string = orig_arch_string;
5012 opts->x_ix86_tune_string = orig_tune_string;
5013 opts_set->x_ix86_fpmath = orig_fpmath_set;
5015 /* Free up memory allocated to hold the strings */
5016 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
5017 free (option_strings[i]);
5020 return t;
5023 /* Hook to validate attribute((target("string"))). */
5025 static bool
5026 ix86_valid_target_attribute_p (tree fndecl,
5027 tree ARG_UNUSED (name),
5028 tree args,
5029 int ARG_UNUSED (flags))
5031 struct gcc_options func_options;
5032 tree new_target, new_optimize;
5033 bool ret = true;
5035 /* attribute((target("default"))) does nothing, beyond
5036 affecting multi-versioning. */
5037 if (TREE_VALUE (args)
5038 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
5039 && TREE_CHAIN (args) == NULL_TREE
5040 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
5041 return true;
5043 tree old_optimize = build_optimization_node (&global_options);
5045 /* Get the optimization options of the current function. */
5046 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
5048 if (!func_optimize)
5049 func_optimize = old_optimize;
5051 /* Init func_options. */
5052 memset (&func_options, 0, sizeof (func_options));
5053 init_options_struct (&func_options, NULL);
5054 lang_hooks.init_options_struct (&func_options);
5056 cl_optimization_restore (&func_options,
5057 TREE_OPTIMIZATION (func_optimize));
5059 /* Initialize func_options to the default before its target options can
5060 be set. */
5061 cl_target_option_restore (&func_options,
5062 TREE_TARGET_OPTION (target_option_default_node));
5064 new_target = ix86_valid_target_attribute_tree (args, &func_options,
5065 &global_options_set);
5067 new_optimize = build_optimization_node (&func_options);
5069 if (new_target == error_mark_node)
5070 ret = false;
5072 else if (fndecl && new_target)
5074 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5076 if (old_optimize != new_optimize)
5077 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5080 return ret;
5084 /* Hook to determine if one function can safely inline another. */
5086 static bool
5087 ix86_can_inline_p (tree caller, tree callee)
5089 bool ret = false;
5090 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5091 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5093 /* If callee has no option attributes, then it is ok to inline. */
5094 if (!callee_tree)
5095 ret = true;
5097 /* If caller has no option attributes, but callee does then it is not ok to
5098 inline. */
5099 else if (!caller_tree)
5100 ret = false;
5102 else
5104 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5105 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5107 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
5108 can inline a SSE2 function but a SSE2 function can't inline a SSE4
5109 function. */
5110 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5111 != callee_opts->x_ix86_isa_flags)
5112 ret = false;
5114 /* See if we have the same non-isa options. */
5115 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5116 ret = false;
5118 /* See if arch, tune, etc. are the same. */
5119 else if (caller_opts->arch != callee_opts->arch)
5120 ret = false;
5122 else if (caller_opts->tune != callee_opts->tune)
5123 ret = false;
5125 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5126 ret = false;
5128 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5129 ret = false;
5131 else
5132 ret = true;
5135 return ret;
5139 /* Remember the last target of ix86_set_current_function. */
5140 static GTY(()) tree ix86_previous_fndecl;
5142 /* Set targets globals to the default (or current #pragma GCC target
5143 if active). Invalidate ix86_previous_fndecl cache. */
5145 void
5146 ix86_reset_previous_fndecl (void)
5148 tree new_tree = target_option_current_node;
5149 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5150 if (TREE_TARGET_GLOBALS (new_tree))
5151 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5152 else if (new_tree == target_option_default_node)
5153 restore_target_globals (&default_target_globals);
5154 else
5155 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5156 ix86_previous_fndecl = NULL_TREE;
5159 /* Establish appropriate back-end context for processing the function
5160 FNDECL. The argument might be NULL to indicate processing at top
5161 level, outside of any function scope. */
5162 static void
5163 ix86_set_current_function (tree fndecl)
5165 /* Only change the context if the function changes. This hook is called
5166 several times in the course of compiling a function, and we don't want to
5167 slow things down too much or call target_reinit when it isn't safe. */
5168 if (fndecl == ix86_previous_fndecl)
5169 return;
5171 tree old_tree;
5172 if (ix86_previous_fndecl == NULL_TREE)
5173 old_tree = target_option_current_node;
5174 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
5175 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
5176 else
5177 old_tree = target_option_default_node;
5179 if (fndecl == NULL_TREE)
5181 if (old_tree != target_option_current_node)
5182 ix86_reset_previous_fndecl ();
5183 return;
5186 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
5187 if (new_tree == NULL_TREE)
5188 new_tree = target_option_default_node;
5190 if (old_tree != new_tree)
5192 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5193 if (TREE_TARGET_GLOBALS (new_tree))
5194 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5195 else if (new_tree == target_option_default_node)
5196 restore_target_globals (&default_target_globals);
5197 else
5198 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5200 ix86_previous_fndecl = fndecl;
5204 /* Return true if this goes in large data/bss. */
5206 static bool
5207 ix86_in_large_data_p (tree exp)
5209 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5210 return false;
5212 /* Functions are never large data. */
5213 if (TREE_CODE (exp) == FUNCTION_DECL)
5214 return false;
5216 /* Automatic variables are never large data. */
5217 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5218 return false;
5220 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5222 const char *section = DECL_SECTION_NAME (exp);
5223 if (strcmp (section, ".ldata") == 0
5224 || strcmp (section, ".lbss") == 0)
5225 return true;
5226 return false;
5228 else
5230 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5232 /* If this is an incomplete type with size 0, then we can't put it
5233 in data because it might be too big when completed. Also,
5234 int_size_in_bytes returns -1 if size can vary or is larger than
5235 an integer in which case also it is safer to assume that it goes in
5236 large data. */
5237 if (size <= 0 || size > ix86_section_threshold)
5238 return true;
5241 return false;
5244 /* Switch to the appropriate section for output of DECL.
5245 DECL is either a `VAR_DECL' node or a constant of some sort.
5246 RELOC indicates whether forming the initial value of DECL requires
5247 link-time relocations. */
5249 ATTRIBUTE_UNUSED static section *
5250 x86_64_elf_select_section (tree decl, int reloc,
5251 unsigned HOST_WIDE_INT align)
5253 if (ix86_in_large_data_p (decl))
5255 const char *sname = NULL;
5256 unsigned int flags = SECTION_WRITE;
5257 switch (categorize_decl_for_section (decl, reloc))
5259 case SECCAT_DATA:
5260 sname = ".ldata";
5261 break;
5262 case SECCAT_DATA_REL:
5263 sname = ".ldata.rel";
5264 break;
5265 case SECCAT_DATA_REL_LOCAL:
5266 sname = ".ldata.rel.local";
5267 break;
5268 case SECCAT_DATA_REL_RO:
5269 sname = ".ldata.rel.ro";
5270 break;
5271 case SECCAT_DATA_REL_RO_LOCAL:
5272 sname = ".ldata.rel.ro.local";
5273 break;
5274 case SECCAT_BSS:
5275 sname = ".lbss";
5276 flags |= SECTION_BSS;
5277 break;
5278 case SECCAT_RODATA:
5279 case SECCAT_RODATA_MERGE_STR:
5280 case SECCAT_RODATA_MERGE_STR_INIT:
5281 case SECCAT_RODATA_MERGE_CONST:
5282 sname = ".lrodata";
5283 flags = 0;
5284 break;
5285 case SECCAT_SRODATA:
5286 case SECCAT_SDATA:
5287 case SECCAT_SBSS:
5288 gcc_unreachable ();
5289 case SECCAT_TEXT:
5290 case SECCAT_TDATA:
5291 case SECCAT_TBSS:
5292 /* We don't split these for medium model. Place them into
5293 default sections and hope for best. */
5294 break;
5296 if (sname)
5298 /* We might get called with string constants, but get_named_section
5299 doesn't like them as they are not DECLs. Also, we need to set
5300 flags in that case. */
5301 if (!DECL_P (decl))
5302 return get_section (sname, flags, NULL);
5303 return get_named_section (decl, sname, reloc);
5306 return default_elf_select_section (decl, reloc, align);
5309 /* Select a set of attributes for section NAME based on the properties
5310 of DECL and whether or not RELOC indicates that DECL's initializer
5311 might contain runtime relocations. */
5313 static unsigned int ATTRIBUTE_UNUSED
5314 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5316 unsigned int flags = default_section_type_flags (decl, name, reloc);
5318 if (decl == NULL_TREE
5319 && (strcmp (name, ".ldata.rel.ro") == 0
5320 || strcmp (name, ".ldata.rel.ro.local") == 0))
5321 flags |= SECTION_RELRO;
5323 if (strcmp (name, ".lbss") == 0
5324 || strncmp (name, ".lbss.", 5) == 0
5325 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5326 flags |= SECTION_BSS;
5328 return flags;
5331 /* Build up a unique section name, expressed as a
5332 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5333 RELOC indicates whether the initial value of EXP requires
5334 link-time relocations. */
5336 static void ATTRIBUTE_UNUSED
5337 x86_64_elf_unique_section (tree decl, int reloc)
5339 if (ix86_in_large_data_p (decl))
5341 const char *prefix = NULL;
5342 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5343 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5345 switch (categorize_decl_for_section (decl, reloc))
5347 case SECCAT_DATA:
5348 case SECCAT_DATA_REL:
5349 case SECCAT_DATA_REL_LOCAL:
5350 case SECCAT_DATA_REL_RO:
5351 case SECCAT_DATA_REL_RO_LOCAL:
5352 prefix = one_only ? ".ld" : ".ldata";
5353 break;
5354 case SECCAT_BSS:
5355 prefix = one_only ? ".lb" : ".lbss";
5356 break;
5357 case SECCAT_RODATA:
5358 case SECCAT_RODATA_MERGE_STR:
5359 case SECCAT_RODATA_MERGE_STR_INIT:
5360 case SECCAT_RODATA_MERGE_CONST:
5361 prefix = one_only ? ".lr" : ".lrodata";
5362 break;
5363 case SECCAT_SRODATA:
5364 case SECCAT_SDATA:
5365 case SECCAT_SBSS:
5366 gcc_unreachable ();
5367 case SECCAT_TEXT:
5368 case SECCAT_TDATA:
5369 case SECCAT_TBSS:
5370 /* We don't split these for medium model. Place them into
5371 default sections and hope for best. */
5372 break;
5374 if (prefix)
5376 const char *name, *linkonce;
5377 char *string;
5379 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5380 name = targetm.strip_name_encoding (name);
5382 /* If we're using one_only, then there needs to be a .gnu.linkonce
5383 prefix to the section name. */
5384 linkonce = one_only ? ".gnu.linkonce" : "";
5386 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5388 set_decl_section_name (decl, string);
5389 return;
5392 default_unique_section (decl, reloc);
5395 #ifdef COMMON_ASM_OP
5396 /* This says how to output assembler code to declare an
5397 uninitialized external linkage data object.
5399 For medium model x86-64 we need to use .largecomm opcode for
5400 large objects. */
5401 void
5402 x86_elf_aligned_common (FILE *file,
5403 const char *name, unsigned HOST_WIDE_INT size,
5404 int align)
5406 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5407 && size > (unsigned int)ix86_section_threshold)
5408 fputs ("\t.largecomm\t", file);
5409 else
5410 fputs (COMMON_ASM_OP, file);
5411 assemble_name (file, name);
5412 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5413 size, align / BITS_PER_UNIT);
5415 #endif
5417 /* Utility function for targets to use in implementing
5418 ASM_OUTPUT_ALIGNED_BSS. */
5420 void
5421 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5422 unsigned HOST_WIDE_INT size, int align)
5424 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5425 && size > (unsigned int)ix86_section_threshold)
5426 switch_to_section (get_named_section (decl, ".lbss", 0));
5427 else
5428 switch_to_section (bss_section);
5429 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5430 #ifdef ASM_DECLARE_OBJECT_NAME
5431 last_assemble_variable_decl = decl;
5432 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5433 #else
5434 /* Standard thing is just output label for the object. */
5435 ASM_OUTPUT_LABEL (file, name);
5436 #endif /* ASM_DECLARE_OBJECT_NAME */
5437 ASM_OUTPUT_SKIP (file, size ? size : 1);
5440 /* Decide whether we must probe the stack before any space allocation
5441 on this target. It's essentially TARGET_STACK_PROBE except when
5442 -fstack-check causes the stack to be already probed differently. */
5444 bool
5445 ix86_target_stack_probe (void)
5447 /* Do not probe the stack twice if static stack checking is enabled. */
5448 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5449 return false;
5451 return TARGET_STACK_PROBE;
5454 /* Decide whether we can make a sibling call to a function. DECL is the
5455 declaration of the function being targeted by the call and EXP is the
5456 CALL_EXPR representing the call. */
5458 static bool
5459 ix86_function_ok_for_sibcall (tree decl, tree exp)
5461 tree type, decl_or_type;
5462 rtx a, b;
5464 /* If we are generating position-independent code, we cannot sibcall
5465 optimize any indirect call, or a direct call to a global function,
5466 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5467 if (!TARGET_MACHO
5468 && !TARGET_64BIT
5469 && flag_pic
5470 && (!decl || !targetm.binds_local_p (decl)))
5471 return false;
5473 /* If we need to align the outgoing stack, then sibcalling would
5474 unalign the stack, which may break the called function. */
5475 if (ix86_minimum_incoming_stack_boundary (true)
5476 < PREFERRED_STACK_BOUNDARY)
5477 return false;
5479 if (decl)
5481 decl_or_type = decl;
5482 type = TREE_TYPE (decl);
5484 else
5486 /* We're looking at the CALL_EXPR, we need the type of the function. */
5487 type = CALL_EXPR_FN (exp); /* pointer expression */
5488 type = TREE_TYPE (type); /* pointer type */
5489 type = TREE_TYPE (type); /* function type */
5490 decl_or_type = type;
5493 /* Check that the return value locations are the same. Like
5494 if we are returning floats on the 80387 register stack, we cannot
5495 make a sibcall from a function that doesn't return a float to a
5496 function that does or, conversely, from a function that does return
5497 a float to a function that doesn't; the necessary stack adjustment
5498 would not be executed. This is also the place we notice
5499 differences in the return value ABI. Note that it is ok for one
5500 of the functions to have void return type as long as the return
5501 value of the other is passed in a register. */
5502 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5503 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5504 cfun->decl, false);
5505 if (STACK_REG_P (a) || STACK_REG_P (b))
5507 if (!rtx_equal_p (a, b))
5508 return false;
5510 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5512 else if (!rtx_equal_p (a, b))
5513 return false;
5515 if (TARGET_64BIT)
5517 /* The SYSV ABI has more call-clobbered registers;
5518 disallow sibcalls from MS to SYSV. */
5519 if (cfun->machine->call_abi == MS_ABI
5520 && ix86_function_type_abi (type) == SYSV_ABI)
5521 return false;
5523 else
5525 /* If this call is indirect, we'll need to be able to use a
5526 call-clobbered register for the address of the target function.
5527 Make sure that all such registers are not used for passing
5528 parameters. Note that DLLIMPORT functions are indirect. */
5529 if (!decl
5530 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5532 if (ix86_function_regparm (type, NULL) >= 3)
5534 /* ??? Need to count the actual number of registers to be used,
5535 not the possible number of registers. Fix later. */
5536 return false;
5541 /* Otherwise okay. That also includes certain types of indirect calls. */
5542 return true;
5545 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5546 and "sseregparm" calling convention attributes;
5547 arguments as in struct attribute_spec.handler. */
5549 static tree
5550 ix86_handle_cconv_attribute (tree *node, tree name,
5551 tree args,
5552 int,
5553 bool *no_add_attrs)
5555 if (TREE_CODE (*node) != FUNCTION_TYPE
5556 && TREE_CODE (*node) != METHOD_TYPE
5557 && TREE_CODE (*node) != FIELD_DECL
5558 && TREE_CODE (*node) != TYPE_DECL)
5560 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5561 name);
5562 *no_add_attrs = true;
5563 return NULL_TREE;
5566 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5567 if (is_attribute_p ("regparm", name))
5569 tree cst;
5571 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5573 error ("fastcall and regparm attributes are not compatible");
5576 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5578 error ("regparam and thiscall attributes are not compatible");
5581 cst = TREE_VALUE (args);
5582 if (TREE_CODE (cst) != INTEGER_CST)
5584 warning (OPT_Wattributes,
5585 "%qE attribute requires an integer constant argument",
5586 name);
5587 *no_add_attrs = true;
5589 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5591 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5592 name, REGPARM_MAX);
5593 *no_add_attrs = true;
5596 return NULL_TREE;
5599 if (TARGET_64BIT)
5601 /* Do not warn when emulating the MS ABI. */
5602 if ((TREE_CODE (*node) != FUNCTION_TYPE
5603 && TREE_CODE (*node) != METHOD_TYPE)
5604 || ix86_function_type_abi (*node) != MS_ABI)
5605 warning (OPT_Wattributes, "%qE attribute ignored",
5606 name);
5607 *no_add_attrs = true;
5608 return NULL_TREE;
5611 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5612 if (is_attribute_p ("fastcall", name))
5614 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5616 error ("fastcall and cdecl attributes are not compatible");
5618 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5620 error ("fastcall and stdcall attributes are not compatible");
5622 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5624 error ("fastcall and regparm attributes are not compatible");
5626 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5628 error ("fastcall and thiscall attributes are not compatible");
5632 /* Can combine stdcall with fastcall (redundant), regparm and
5633 sseregparm. */
5634 else if (is_attribute_p ("stdcall", name))
5636 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5638 error ("stdcall and cdecl attributes are not compatible");
5640 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5642 error ("stdcall and fastcall attributes are not compatible");
5644 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5646 error ("stdcall and thiscall attributes are not compatible");
5650 /* Can combine cdecl with regparm and sseregparm. */
5651 else if (is_attribute_p ("cdecl", name))
5653 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5655 error ("stdcall and cdecl attributes are not compatible");
5657 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5659 error ("fastcall and cdecl attributes are not compatible");
5661 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5663 error ("cdecl and thiscall attributes are not compatible");
5666 else if (is_attribute_p ("thiscall", name))
5668 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5669 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
5670 name);
5671 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5673 error ("stdcall and thiscall attributes are not compatible");
5675 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5677 error ("fastcall and thiscall attributes are not compatible");
5679 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5681 error ("cdecl and thiscall attributes are not compatible");
5685 /* Can combine sseregparm with all attributes. */
5687 return NULL_TREE;
5690 /* The transactional memory builtins are implicitly regparm or fastcall
5691 depending on the ABI. Override the generic do-nothing attribute that
5692 these builtins were declared with, and replace it with one of the two
5693 attributes that we expect elsewhere. */
5695 static tree
5696 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5697 int flags, bool *no_add_attrs)
5699 tree alt;
5701 /* In no case do we want to add the placeholder attribute. */
5702 *no_add_attrs = true;
5704 /* The 64-bit ABI is unchanged for transactional memory. */
5705 if (TARGET_64BIT)
5706 return NULL_TREE;
5708 /* ??? Is there a better way to validate 32-bit windows? We have
5709 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5710 if (CHECK_STACK_LIMIT > 0)
5711 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5712 else
5714 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5715 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5717 decl_attributes (node, alt, flags);
5719 return NULL_TREE;
5722 /* This function determines from TYPE the calling-convention. */
5724 unsigned int
5725 ix86_get_callcvt (const_tree type)
5727 unsigned int ret = 0;
5728 bool is_stdarg;
5729 tree attrs;
5731 if (TARGET_64BIT)
5732 return IX86_CALLCVT_CDECL;
5734 attrs = TYPE_ATTRIBUTES (type);
5735 if (attrs != NULL_TREE)
5737 if (lookup_attribute ("cdecl", attrs))
5738 ret |= IX86_CALLCVT_CDECL;
5739 else if (lookup_attribute ("stdcall", attrs))
5740 ret |= IX86_CALLCVT_STDCALL;
5741 else if (lookup_attribute ("fastcall", attrs))
5742 ret |= IX86_CALLCVT_FASTCALL;
5743 else if (lookup_attribute ("thiscall", attrs))
5744 ret |= IX86_CALLCVT_THISCALL;
5746 /* Regparam isn't allowed for thiscall and fastcall. */
5747 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5749 if (lookup_attribute ("regparm", attrs))
5750 ret |= IX86_CALLCVT_REGPARM;
5751 if (lookup_attribute ("sseregparm", attrs))
5752 ret |= IX86_CALLCVT_SSEREGPARM;
5755 if (IX86_BASE_CALLCVT(ret) != 0)
5756 return ret;
5759 is_stdarg = stdarg_p (type);
5760 if (TARGET_RTD && !is_stdarg)
5761 return IX86_CALLCVT_STDCALL | ret;
5763 if (ret != 0
5764 || is_stdarg
5765 || TREE_CODE (type) != METHOD_TYPE
5766 || ix86_function_type_abi (type) != MS_ABI)
5767 return IX86_CALLCVT_CDECL | ret;
5769 return IX86_CALLCVT_THISCALL;
5772 /* Return 0 if the attributes for two types are incompatible, 1 if they
5773 are compatible, and 2 if they are nearly compatible (which causes a
5774 warning to be generated). */
5776 static int
5777 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5779 unsigned int ccvt1, ccvt2;
5781 if (TREE_CODE (type1) != FUNCTION_TYPE
5782 && TREE_CODE (type1) != METHOD_TYPE)
5783 return 1;
5785 ccvt1 = ix86_get_callcvt (type1);
5786 ccvt2 = ix86_get_callcvt (type2);
5787 if (ccvt1 != ccvt2)
5788 return 0;
5789 if (ix86_function_regparm (type1, NULL)
5790 != ix86_function_regparm (type2, NULL))
5791 return 0;
5793 return 1;
5796 /* Return the regparm value for a function with the indicated TYPE and DECL.
5797 DECL may be NULL when calling function indirectly
5798 or considering a libcall. */
5800 static int
5801 ix86_function_regparm (const_tree type, const_tree decl)
5803 tree attr;
5804 int regparm;
5805 unsigned int ccvt;
5807 if (TARGET_64BIT)
5808 return (ix86_function_type_abi (type) == SYSV_ABI
5809 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5810 ccvt = ix86_get_callcvt (type);
5811 regparm = ix86_regparm;
5813 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5815 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5816 if (attr)
5818 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5819 return regparm;
5822 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5823 return 2;
5824 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5825 return 1;
5827 /* Use register calling convention for local functions when possible. */
5828 if (decl
5829 && TREE_CODE (decl) == FUNCTION_DECL)
5831 cgraph_node *target = cgraph_node::get (decl);
5832 if (target)
5833 target = target->function_symbol ();
5835 /* Caller and callee must agree on the calling convention, so
5836 checking here just optimize means that with
5837 __attribute__((optimize (...))) caller could use regparm convention
5838 and callee not, or vice versa. Instead look at whether the callee
5839 is optimized or not. */
5840 if (target && opt_for_fn (target->decl, optimize)
5841 && !(profile_flag && !flag_fentry))
5843 cgraph_local_info *i = &target->local;
5844 if (i && i->local && i->can_change_signature)
5846 int local_regparm, globals = 0, regno;
5848 /* Make sure no regparm register is taken by a
5849 fixed register variable. */
5850 for (local_regparm = 0; local_regparm < REGPARM_MAX;
5851 local_regparm++)
5852 if (fixed_regs[local_regparm])
5853 break;
5855 /* We don't want to use regparm(3) for nested functions as
5856 these use a static chain pointer in the third argument. */
5857 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
5858 local_regparm = 2;
5860 /* Save a register for the split stack. */
5861 if (local_regparm == 3 && flag_split_stack)
5862 local_regparm = 2;
5864 /* Each fixed register usage increases register pressure,
5865 so less registers should be used for argument passing.
5866 This functionality can be overriden by an explicit
5867 regparm value. */
5868 for (regno = AX_REG; regno <= DI_REG; regno++)
5869 if (fixed_regs[regno])
5870 globals++;
5872 local_regparm
5873 = globals < local_regparm ? local_regparm - globals : 0;
5875 if (local_regparm > regparm)
5876 regparm = local_regparm;
5881 return regparm;
5884 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5885 DFmode (2) arguments in SSE registers for a function with the
5886 indicated TYPE and DECL. DECL may be NULL when calling function
5887 indirectly or considering a libcall. Otherwise return 0. */
5889 static int
5890 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5892 gcc_assert (!TARGET_64BIT);
5894 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5895 by the sseregparm attribute. */
5896 if (TARGET_SSEREGPARM
5897 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5899 if (!TARGET_SSE)
5901 if (warn)
5903 if (decl)
5904 error ("calling %qD with attribute sseregparm without "
5905 "SSE/SSE2 enabled", decl);
5906 else
5907 error ("calling %qT with attribute sseregparm without "
5908 "SSE/SSE2 enabled", type);
5910 return 0;
5913 return 2;
5916 if (!decl)
5917 return 0;
5919 cgraph_node *target = cgraph_node::get (decl);
5920 if (target)
5921 target = target->function_symbol ();
5923 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5924 (and DFmode for SSE2) arguments in SSE registers. */
5925 if (target
5926 /* TARGET_SSE_MATH */
5927 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
5928 && opt_for_fn (target->decl, optimize)
5929 && !(profile_flag && !flag_fentry))
5931 cgraph_local_info *i = &target->local;
5932 if (i && i->local && i->can_change_signature)
5934 /* Refuse to produce wrong code when local function with SSE enabled
5935 is called from SSE disabled function.
5936 We may work hard to work out these scenarios but hopefully
5937 it doesnot matter in practice. */
5938 if (!TARGET_SSE && warn)
5940 error ("calling %qD with SSE caling convention without "
5941 "SSE/SSE2 enabled", decl);
5942 return 0;
5944 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
5945 ->x_ix86_isa_flags) ? 2 : 1;
5949 return 0;
5952 /* Return true if EAX is live at the start of the function. Used by
5953 ix86_expand_prologue to determine if we need special help before
5954 calling allocate_stack_worker. */
5956 static bool
5957 ix86_eax_live_at_start_p (void)
5959 /* Cheat. Don't bother working forward from ix86_function_regparm
5960 to the function type to whether an actual argument is located in
5961 eax. Instead just look at cfg info, which is still close enough
5962 to correct at this point. This gives false positives for broken
5963 functions that might use uninitialized data that happens to be
5964 allocated in eax, but who cares? */
5965 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5968 static bool
5969 ix86_keep_aggregate_return_pointer (tree fntype)
5971 tree attr;
5973 if (!TARGET_64BIT)
5975 attr = lookup_attribute ("callee_pop_aggregate_return",
5976 TYPE_ATTRIBUTES (fntype));
5977 if (attr)
5978 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5980 /* For 32-bit MS-ABI the default is to keep aggregate
5981 return pointer. */
5982 if (ix86_function_type_abi (fntype) == MS_ABI)
5983 return true;
5985 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5988 /* Value is the number of bytes of arguments automatically
5989 popped when returning from a subroutine call.
5990 FUNDECL is the declaration node of the function (as a tree),
5991 FUNTYPE is the data type of the function (as a tree),
5992 or for a library call it is an identifier node for the subroutine name.
5993 SIZE is the number of bytes of arguments passed on the stack.
5995 On the 80386, the RTD insn may be used to pop them if the number
5996 of args is fixed, but if the number is variable then the caller
5997 must pop them all. RTD can't be used for library calls now
5998 because the library is compiled with the Unix compiler.
5999 Use of RTD is a selectable option, since it is incompatible with
6000 standard Unix calling sequences. If the option is not selected,
6001 the caller must always pop the args.
6003 The attribute stdcall is equivalent to RTD on a per module basis. */
6005 static int
6006 ix86_return_pops_args (tree fundecl, tree funtype, int size)
6008 unsigned int ccvt;
6010 /* None of the 64-bit ABIs pop arguments. */
6011 if (TARGET_64BIT)
6012 return 0;
6014 ccvt = ix86_get_callcvt (funtype);
6016 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
6017 | IX86_CALLCVT_THISCALL)) != 0
6018 && ! stdarg_p (funtype))
6019 return size;
6021 /* Lose any fake structure return argument if it is passed on the stack. */
6022 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
6023 && !ix86_keep_aggregate_return_pointer (funtype))
6025 int nregs = ix86_function_regparm (funtype, fundecl);
6026 if (nregs == 0)
6027 return GET_MODE_SIZE (Pmode);
6030 return 0;
6033 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
6035 static bool
6036 ix86_legitimate_combined_insn (rtx_insn *insn)
6038 /* Check operand constraints in case hard registers were propagated
6039 into insn pattern. This check prevents combine pass from
6040 generating insn patterns with invalid hard register operands.
6041 These invalid insns can eventually confuse reload to error out
6042 with a spill failure. See also PRs 46829 and 46843. */
6043 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
6045 int i;
6047 extract_insn (insn);
6048 preprocess_constraints (insn);
6050 int n_operands = recog_data.n_operands;
6051 int n_alternatives = recog_data.n_alternatives;
6052 for (i = 0; i < n_operands; i++)
6054 rtx op = recog_data.operand[i];
6055 machine_mode mode = GET_MODE (op);
6056 const operand_alternative *op_alt;
6057 int offset = 0;
6058 bool win;
6059 int j;
6061 /* For pre-AVX disallow unaligned loads/stores where the
6062 instructions don't support it. */
6063 if (!TARGET_AVX
6064 && VECTOR_MODE_P (GET_MODE (op))
6065 && misaligned_operand (op, GET_MODE (op)))
6067 int min_align = get_attr_ssememalign (insn);
6068 if (min_align == 0)
6069 return false;
6072 /* A unary operator may be accepted by the predicate, but it
6073 is irrelevant for matching constraints. */
6074 if (UNARY_P (op))
6075 op = XEXP (op, 0);
6077 if (GET_CODE (op) == SUBREG)
6079 if (REG_P (SUBREG_REG (op))
6080 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6081 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6082 GET_MODE (SUBREG_REG (op)),
6083 SUBREG_BYTE (op),
6084 GET_MODE (op));
6085 op = SUBREG_REG (op);
6088 if (!(REG_P (op) && HARD_REGISTER_P (op)))
6089 continue;
6091 op_alt = recog_op_alt;
6093 /* Operand has no constraints, anything is OK. */
6094 win = !n_alternatives;
6096 alternative_mask preferred = get_preferred_alternatives (insn);
6097 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6099 if (!TEST_BIT (preferred, j))
6100 continue;
6101 if (op_alt[i].anything_ok
6102 || (op_alt[i].matches != -1
6103 && operands_match_p
6104 (recog_data.operand[i],
6105 recog_data.operand[op_alt[i].matches]))
6106 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6108 win = true;
6109 break;
6113 if (!win)
6114 return false;
6118 return true;
6121 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6123 static unsigned HOST_WIDE_INT
6124 ix86_asan_shadow_offset (void)
6126 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6127 : HOST_WIDE_INT_C (0x7fff8000))
6128 : (HOST_WIDE_INT_1 << 29);
6131 /* Argument support functions. */
6133 /* Return true when register may be used to pass function parameters. */
6134 bool
6135 ix86_function_arg_regno_p (int regno)
6137 int i;
6138 const int *parm_regs;
6140 if (TARGET_MPX && BND_REGNO_P (regno))
6141 return true;
6143 if (!TARGET_64BIT)
6145 if (TARGET_MACHO)
6146 return (regno < REGPARM_MAX
6147 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6148 else
6149 return (regno < REGPARM_MAX
6150 || (TARGET_MMX && MMX_REGNO_P (regno)
6151 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6152 || (TARGET_SSE && SSE_REGNO_P (regno)
6153 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6156 if (TARGET_SSE && SSE_REGNO_P (regno)
6157 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6158 return true;
6160 /* TODO: The function should depend on current function ABI but
6161 builtins.c would need updating then. Therefore we use the
6162 default ABI. */
6164 /* RAX is used as hidden argument to va_arg functions. */
6165 if (ix86_abi == SYSV_ABI && regno == AX_REG)
6166 return true;
6168 if (ix86_abi == MS_ABI)
6169 parm_regs = x86_64_ms_abi_int_parameter_registers;
6170 else
6171 parm_regs = x86_64_int_parameter_registers;
6172 for (i = 0; i < (ix86_abi == MS_ABI
6173 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6174 if (regno == parm_regs[i])
6175 return true;
6176 return false;
6179 /* Return if we do not know how to pass TYPE solely in registers. */
6181 static bool
6182 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6184 if (must_pass_in_stack_var_size_or_pad (mode, type))
6185 return true;
6187 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6188 The layout_type routine is crafty and tries to trick us into passing
6189 currently unsupported vector types on the stack by using TImode. */
6190 return (!TARGET_64BIT && mode == TImode
6191 && type && TREE_CODE (type) != VECTOR_TYPE);
6194 /* It returns the size, in bytes, of the area reserved for arguments passed
6195 in registers for the function represented by fndecl dependent to the used
6196 abi format. */
6198 ix86_reg_parm_stack_space (const_tree fndecl)
6200 enum calling_abi call_abi = SYSV_ABI;
6201 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6202 call_abi = ix86_function_abi (fndecl);
6203 else
6204 call_abi = ix86_function_type_abi (fndecl);
6205 if (TARGET_64BIT && call_abi == MS_ABI)
6206 return 32;
6207 return 0;
6210 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6211 call abi used. */
6212 enum calling_abi
6213 ix86_function_type_abi (const_tree fntype)
6215 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6217 enum calling_abi abi = ix86_abi;
6218 if (abi == SYSV_ABI)
6220 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6222 if (TARGET_X32)
6224 static bool warned = false;
6225 if (!warned)
6227 error ("X32 does not support ms_abi attribute");
6228 warned = true;
6231 abi = MS_ABI;
6234 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6235 abi = SYSV_ABI;
6236 return abi;
6238 return ix86_abi;
6241 /* We add this as a workaround in order to use libc_has_function
6242 hook in i386.md. */
6243 bool
6244 ix86_libc_has_function (enum function_class fn_class)
6246 return targetm.libc_has_function (fn_class);
6249 static bool
6250 ix86_function_ms_hook_prologue (const_tree fn)
6252 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6254 if (decl_function_context (fn) != NULL_TREE)
6255 error_at (DECL_SOURCE_LOCATION (fn),
6256 "ms_hook_prologue is not compatible with nested function");
6257 else
6258 return true;
6260 return false;
6263 static enum calling_abi
6264 ix86_function_abi (const_tree fndecl)
6266 if (! fndecl)
6267 return ix86_abi;
6268 return ix86_function_type_abi (TREE_TYPE (fndecl));
6271 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6272 call abi used. */
6273 enum calling_abi
6274 ix86_cfun_abi (void)
6276 if (! cfun)
6277 return ix86_abi;
6278 return cfun->machine->call_abi;
6281 /* Write the extra assembler code needed to declare a function properly. */
6283 void
6284 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6285 tree decl)
6287 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6289 if (is_ms_hook)
6291 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6292 unsigned int filler_cc = 0xcccccccc;
6294 for (i = 0; i < filler_count; i += 4)
6295 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6298 #ifdef SUBTARGET_ASM_UNWIND_INIT
6299 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6300 #endif
6302 ASM_OUTPUT_LABEL (asm_out_file, fname);
6304 /* Output magic byte marker, if hot-patch attribute is set. */
6305 if (is_ms_hook)
6307 if (TARGET_64BIT)
6309 /* leaq [%rsp + 0], %rsp */
6310 asm_fprintf (asm_out_file, ASM_BYTE
6311 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6313 else
6315 /* movl.s %edi, %edi
6316 push %ebp
6317 movl.s %esp, %ebp */
6318 asm_fprintf (asm_out_file, ASM_BYTE
6319 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6324 /* regclass.c */
6325 extern void init_regs (void);
6327 /* Implementation of call abi switching target hook. Specific to FNDECL
6328 the specific call register sets are set. See also
6329 ix86_conditional_register_usage for more details. */
6330 void
6331 ix86_call_abi_override (const_tree fndecl)
6333 if (fndecl == NULL_TREE)
6334 cfun->machine->call_abi = ix86_abi;
6335 else
6336 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6339 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6340 expensive re-initialization of init_regs each time we switch function context
6341 since this is needed only during RTL expansion. */
6342 static void
6343 ix86_maybe_switch_abi (void)
6345 if (TARGET_64BIT &&
6346 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6347 reinit_regs ();
6350 /* Return 1 if pseudo register should be created and used to hold
6351 GOT address for PIC code. */
6352 bool
6353 ix86_use_pseudo_pic_reg (void)
6355 if ((TARGET_64BIT
6356 && (ix86_cmodel == CM_SMALL_PIC
6357 || TARGET_PECOFF))
6358 || !flag_pic)
6359 return false;
6360 return true;
6363 /* Initialize large model PIC register. */
6365 static void
6366 ix86_init_large_pic_reg (unsigned int tmp_regno)
6368 rtx_code_label *label;
6369 rtx tmp_reg;
6371 gcc_assert (Pmode == DImode);
6372 label = gen_label_rtx ();
6373 emit_label (label);
6374 LABEL_PRESERVE_P (label) = 1;
6375 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6376 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6377 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6378 label));
6379 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6380 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6381 pic_offset_table_rtx, tmp_reg));
6384 /* Create and initialize PIC register if required. */
6385 static void
6386 ix86_init_pic_reg (void)
6388 edge entry_edge;
6389 rtx_insn *seq;
6391 if (!ix86_use_pseudo_pic_reg ())
6392 return;
6394 start_sequence ();
6396 if (TARGET_64BIT)
6398 if (ix86_cmodel == CM_LARGE_PIC)
6399 ix86_init_large_pic_reg (R11_REG);
6400 else
6401 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6403 else
6405 /* If there is future mcount call in the function it is more profitable
6406 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
6407 rtx reg = crtl->profile
6408 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6409 : pic_offset_table_rtx;
6410 rtx_insn *insn = emit_insn (gen_set_got (reg));
6411 RTX_FRAME_RELATED_P (insn) = 1;
6412 if (crtl->profile)
6413 emit_move_insn (pic_offset_table_rtx, reg);
6414 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6417 seq = get_insns ();
6418 end_sequence ();
6420 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6421 insert_insn_on_edge (seq, entry_edge);
6422 commit_one_edge_insertion (entry_edge);
6425 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6426 for a call to a function whose data type is FNTYPE.
6427 For a library call, FNTYPE is 0. */
6429 void
6430 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6431 tree fntype, /* tree ptr for function decl */
6432 rtx libname, /* SYMBOL_REF of library name or 0 */
6433 tree fndecl,
6434 int caller)
6436 struct cgraph_local_info *i = NULL;
6437 struct cgraph_node *target = NULL;
6439 memset (cum, 0, sizeof (*cum));
6441 if (fndecl)
6443 target = cgraph_node::get (fndecl);
6444 if (target)
6446 target = target->function_symbol ();
6447 i = cgraph_node::local_info (target->decl);
6448 cum->call_abi = ix86_function_abi (target->decl);
6450 else
6451 cum->call_abi = ix86_function_abi (fndecl);
6453 else
6454 cum->call_abi = ix86_function_type_abi (fntype);
6456 cum->caller = caller;
6458 /* Set up the number of registers to use for passing arguments. */
6459 cum->nregs = ix86_regparm;
6460 if (TARGET_64BIT)
6462 cum->nregs = (cum->call_abi == SYSV_ABI
6463 ? X86_64_REGPARM_MAX
6464 : X86_64_MS_REGPARM_MAX);
6466 if (TARGET_SSE)
6468 cum->sse_nregs = SSE_REGPARM_MAX;
6469 if (TARGET_64BIT)
6471 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6472 ? X86_64_SSE_REGPARM_MAX
6473 : X86_64_MS_SSE_REGPARM_MAX);
6476 if (TARGET_MMX)
6477 cum->mmx_nregs = MMX_REGPARM_MAX;
6478 cum->warn_avx512f = true;
6479 cum->warn_avx = true;
6480 cum->warn_sse = true;
6481 cum->warn_mmx = true;
6483 /* Because type might mismatch in between caller and callee, we need to
6484 use actual type of function for local calls.
6485 FIXME: cgraph_analyze can be told to actually record if function uses
6486 va_start so for local functions maybe_vaarg can be made aggressive
6487 helping K&R code.
6488 FIXME: once typesytem is fixed, we won't need this code anymore. */
6489 if (i && i->local && i->can_change_signature)
6490 fntype = TREE_TYPE (target->decl);
6491 cum->stdarg = stdarg_p (fntype);
6492 cum->maybe_vaarg = (fntype
6493 ? (!prototype_p (fntype) || stdarg_p (fntype))
6494 : !libname);
6496 cum->bnd_regno = FIRST_BND_REG;
6497 cum->bnds_in_bt = 0;
6498 cum->force_bnd_pass = 0;
6500 if (!TARGET_64BIT)
6502 /* If there are variable arguments, then we won't pass anything
6503 in registers in 32-bit mode. */
6504 if (stdarg_p (fntype))
6506 cum->nregs = 0;
6507 cum->sse_nregs = 0;
6508 cum->mmx_nregs = 0;
6509 cum->warn_avx512f = false;
6510 cum->warn_avx = false;
6511 cum->warn_sse = false;
6512 cum->warn_mmx = false;
6513 return;
6516 /* Use ecx and edx registers if function has fastcall attribute,
6517 else look for regparm information. */
6518 if (fntype)
6520 unsigned int ccvt = ix86_get_callcvt (fntype);
6521 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6523 cum->nregs = 1;
6524 cum->fastcall = 1; /* Same first register as in fastcall. */
6526 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6528 cum->nregs = 2;
6529 cum->fastcall = 1;
6531 else
6532 cum->nregs = ix86_function_regparm (fntype, fndecl);
6535 /* Set up the number of SSE registers used for passing SFmode
6536 and DFmode arguments. Warn for mismatching ABI. */
6537 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6541 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6542 But in the case of vector types, it is some vector mode.
6544 When we have only some of our vector isa extensions enabled, then there
6545 are some modes for which vector_mode_supported_p is false. For these
6546 modes, the generic vector support in gcc will choose some non-vector mode
6547 in order to implement the type. By computing the natural mode, we'll
6548 select the proper ABI location for the operand and not depend on whatever
6549 the middle-end decides to do with these vector types.
6551 The midde-end can't deal with the vector types > 16 bytes. In this
6552 case, we return the original mode and warn ABI change if CUM isn't
6553 NULL.
6555 If INT_RETURN is true, warn ABI change if the vector mode isn't
6556 available for function return value. */
6558 static machine_mode
6559 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6560 bool in_return)
6562 machine_mode mode = TYPE_MODE (type);
6564 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6566 HOST_WIDE_INT size = int_size_in_bytes (type);
6567 if ((size == 8 || size == 16 || size == 32 || size == 64)
6568 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6569 && TYPE_VECTOR_SUBPARTS (type) > 1)
6571 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6573 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6574 mode = MIN_MODE_VECTOR_FLOAT;
6575 else
6576 mode = MIN_MODE_VECTOR_INT;
6578 /* Get the mode which has this inner mode and number of units. */
6579 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6580 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6581 && GET_MODE_INNER (mode) == innermode)
6583 if (size == 64 && !TARGET_AVX512F)
6585 static bool warnedavx512f;
6586 static bool warnedavx512f_ret;
6588 if (cum && cum->warn_avx512f && !warnedavx512f)
6590 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6591 "without AVX512F enabled changes the ABI"))
6592 warnedavx512f = true;
6594 else if (in_return && !warnedavx512f_ret)
6596 if (warning (OPT_Wpsabi, "AVX512F vector return "
6597 "without AVX512F enabled changes the ABI"))
6598 warnedavx512f_ret = true;
6601 return TYPE_MODE (type);
6603 else if (size == 32 && !TARGET_AVX)
6605 static bool warnedavx;
6606 static bool warnedavx_ret;
6608 if (cum && cum->warn_avx && !warnedavx)
6610 if (warning (OPT_Wpsabi, "AVX vector argument "
6611 "without AVX enabled changes the ABI"))
6612 warnedavx = true;
6614 else if (in_return && !warnedavx_ret)
6616 if (warning (OPT_Wpsabi, "AVX vector return "
6617 "without AVX enabled changes the ABI"))
6618 warnedavx_ret = true;
6621 return TYPE_MODE (type);
6623 else if (((size == 8 && TARGET_64BIT) || size == 16)
6624 && !TARGET_SSE)
6626 static bool warnedsse;
6627 static bool warnedsse_ret;
6629 if (cum && cum->warn_sse && !warnedsse)
6631 if (warning (OPT_Wpsabi, "SSE vector argument "
6632 "without SSE enabled changes the ABI"))
6633 warnedsse = true;
6635 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6637 if (warning (OPT_Wpsabi, "SSE vector return "
6638 "without SSE enabled changes the ABI"))
6639 warnedsse_ret = true;
6642 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6644 static bool warnedmmx;
6645 static bool warnedmmx_ret;
6647 if (cum && cum->warn_mmx && !warnedmmx)
6649 if (warning (OPT_Wpsabi, "MMX vector argument "
6650 "without MMX enabled changes the ABI"))
6651 warnedmmx = true;
6653 else if (in_return && !warnedmmx_ret)
6655 if (warning (OPT_Wpsabi, "MMX vector return "
6656 "without MMX enabled changes the ABI"))
6657 warnedmmx_ret = true;
6660 return mode;
6663 gcc_unreachable ();
6667 return mode;
6670 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6671 this may not agree with the mode that the type system has chosen for the
6672 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6673 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6675 static rtx
6676 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6677 unsigned int regno)
6679 rtx tmp;
6681 if (orig_mode != BLKmode)
6682 tmp = gen_rtx_REG (orig_mode, regno);
6683 else
6685 tmp = gen_rtx_REG (mode, regno);
6686 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6687 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6690 return tmp;
6693 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6694 of this code is to classify each 8bytes of incoming argument by the register
6695 class and assign registers accordingly. */
6697 /* Return the union class of CLASS1 and CLASS2.
6698 See the x86-64 PS ABI for details. */
6700 static enum x86_64_reg_class
6701 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6703 /* Rule #1: If both classes are equal, this is the resulting class. */
6704 if (class1 == class2)
6705 return class1;
6707 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6708 the other class. */
6709 if (class1 == X86_64_NO_CLASS)
6710 return class2;
6711 if (class2 == X86_64_NO_CLASS)
6712 return class1;
6714 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6715 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6716 return X86_64_MEMORY_CLASS;
6718 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6719 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6720 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6721 return X86_64_INTEGERSI_CLASS;
6722 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6723 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6724 return X86_64_INTEGER_CLASS;
6726 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6727 MEMORY is used. */
6728 if (class1 == X86_64_X87_CLASS
6729 || class1 == X86_64_X87UP_CLASS
6730 || class1 == X86_64_COMPLEX_X87_CLASS
6731 || class2 == X86_64_X87_CLASS
6732 || class2 == X86_64_X87UP_CLASS
6733 || class2 == X86_64_COMPLEX_X87_CLASS)
6734 return X86_64_MEMORY_CLASS;
6736 /* Rule #6: Otherwise class SSE is used. */
6737 return X86_64_SSE_CLASS;
6740 /* Classify the argument of type TYPE and mode MODE.
6741 CLASSES will be filled by the register class used to pass each word
6742 of the operand. The number of words is returned. In case the parameter
6743 should be passed in memory, 0 is returned. As a special case for zero
6744 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6746 BIT_OFFSET is used internally for handling records and specifies offset
6747 of the offset in bits modulo 512 to avoid overflow cases.
6749 See the x86-64 PS ABI for details.
6752 static int
6753 classify_argument (machine_mode mode, const_tree type,
6754 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6756 HOST_WIDE_INT bytes =
6757 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6758 int words
6759 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6761 /* Variable sized entities are always passed/returned in memory. */
6762 if (bytes < 0)
6763 return 0;
6765 if (mode != VOIDmode
6766 && targetm.calls.must_pass_in_stack (mode, type))
6767 return 0;
6769 if (type && AGGREGATE_TYPE_P (type))
6771 int i;
6772 tree field;
6773 enum x86_64_reg_class subclasses[MAX_CLASSES];
6775 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6776 if (bytes > 64)
6777 return 0;
6779 for (i = 0; i < words; i++)
6780 classes[i] = X86_64_NO_CLASS;
6782 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6783 signalize memory class, so handle it as special case. */
6784 if (!words)
6786 classes[0] = X86_64_NO_CLASS;
6787 return 1;
6790 /* Classify each field of record and merge classes. */
6791 switch (TREE_CODE (type))
6793 case RECORD_TYPE:
6794 /* And now merge the fields of structure. */
6795 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6797 if (TREE_CODE (field) == FIELD_DECL)
6799 int num;
6801 if (TREE_TYPE (field) == error_mark_node)
6802 continue;
6804 /* Bitfields are always classified as integer. Handle them
6805 early, since later code would consider them to be
6806 misaligned integers. */
6807 if (DECL_BIT_FIELD (field))
6809 for (i = (int_bit_position (field)
6810 + (bit_offset % 64)) / 8 / 8;
6811 i < ((int_bit_position (field) + (bit_offset % 64))
6812 + tree_to_shwi (DECL_SIZE (field))
6813 + 63) / 8 / 8; i++)
6814 classes[i] =
6815 merge_classes (X86_64_INTEGER_CLASS,
6816 classes[i]);
6818 else
6820 int pos;
6822 type = TREE_TYPE (field);
6824 /* Flexible array member is ignored. */
6825 if (TYPE_MODE (type) == BLKmode
6826 && TREE_CODE (type) == ARRAY_TYPE
6827 && TYPE_SIZE (type) == NULL_TREE
6828 && TYPE_DOMAIN (type) != NULL_TREE
6829 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6830 == NULL_TREE))
6832 static bool warned;
6834 if (!warned && warn_psabi)
6836 warned = true;
6837 inform (input_location,
6838 "the ABI of passing struct with"
6839 " a flexible array member has"
6840 " changed in GCC 4.4");
6842 continue;
6844 num = classify_argument (TYPE_MODE (type), type,
6845 subclasses,
6846 (int_bit_position (field)
6847 + bit_offset) % 512);
6848 if (!num)
6849 return 0;
6850 pos = (int_bit_position (field)
6851 + (bit_offset % 64)) / 8 / 8;
6852 for (i = 0; i < num && (i + pos) < words; i++)
6853 classes[i + pos] =
6854 merge_classes (subclasses[i], classes[i + pos]);
6858 break;
6860 case ARRAY_TYPE:
6861 /* Arrays are handled as small records. */
6863 int num;
6864 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6865 TREE_TYPE (type), subclasses, bit_offset);
6866 if (!num)
6867 return 0;
6869 /* The partial classes are now full classes. */
6870 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6871 subclasses[0] = X86_64_SSE_CLASS;
6872 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6873 && !((bit_offset % 64) == 0 && bytes == 4))
6874 subclasses[0] = X86_64_INTEGER_CLASS;
6876 for (i = 0; i < words; i++)
6877 classes[i] = subclasses[i % num];
6879 break;
6881 case UNION_TYPE:
6882 case QUAL_UNION_TYPE:
6883 /* Unions are similar to RECORD_TYPE but offset is always 0.
6885 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6887 if (TREE_CODE (field) == FIELD_DECL)
6889 int num;
6891 if (TREE_TYPE (field) == error_mark_node)
6892 continue;
6894 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6895 TREE_TYPE (field), subclasses,
6896 bit_offset);
6897 if (!num)
6898 return 0;
6899 for (i = 0; i < num && i < words; i++)
6900 classes[i] = merge_classes (subclasses[i], classes[i]);
6903 break;
6905 default:
6906 gcc_unreachable ();
6909 if (words > 2)
6911 /* When size > 16 bytes, if the first one isn't
6912 X86_64_SSE_CLASS or any other ones aren't
6913 X86_64_SSEUP_CLASS, everything should be passed in
6914 memory. */
6915 if (classes[0] != X86_64_SSE_CLASS)
6916 return 0;
6918 for (i = 1; i < words; i++)
6919 if (classes[i] != X86_64_SSEUP_CLASS)
6920 return 0;
6923 /* Final merger cleanup. */
6924 for (i = 0; i < words; i++)
6926 /* If one class is MEMORY, everything should be passed in
6927 memory. */
6928 if (classes[i] == X86_64_MEMORY_CLASS)
6929 return 0;
6931 /* The X86_64_SSEUP_CLASS should be always preceded by
6932 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6933 if (classes[i] == X86_64_SSEUP_CLASS
6934 && classes[i - 1] != X86_64_SSE_CLASS
6935 && classes[i - 1] != X86_64_SSEUP_CLASS)
6937 /* The first one should never be X86_64_SSEUP_CLASS. */
6938 gcc_assert (i != 0);
6939 classes[i] = X86_64_SSE_CLASS;
6942 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6943 everything should be passed in memory. */
6944 if (classes[i] == X86_64_X87UP_CLASS
6945 && (classes[i - 1] != X86_64_X87_CLASS))
6947 static bool warned;
6949 /* The first one should never be X86_64_X87UP_CLASS. */
6950 gcc_assert (i != 0);
6951 if (!warned && warn_psabi)
6953 warned = true;
6954 inform (input_location,
6955 "the ABI of passing union with long double"
6956 " has changed in GCC 4.4");
6958 return 0;
6961 return words;
6964 /* Compute alignment needed. We align all types to natural boundaries with
6965 exception of XFmode that is aligned to 64bits. */
6966 if (mode != VOIDmode && mode != BLKmode)
6968 int mode_alignment = GET_MODE_BITSIZE (mode);
6970 if (mode == XFmode)
6971 mode_alignment = 128;
6972 else if (mode == XCmode)
6973 mode_alignment = 256;
6974 if (COMPLEX_MODE_P (mode))
6975 mode_alignment /= 2;
6976 /* Misaligned fields are always returned in memory. */
6977 if (bit_offset % mode_alignment)
6978 return 0;
6981 /* for V1xx modes, just use the base mode */
6982 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6983 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6984 mode = GET_MODE_INNER (mode);
6986 /* Classification of atomic types. */
6987 switch (mode)
6989 case SDmode:
6990 case DDmode:
6991 classes[0] = X86_64_SSE_CLASS;
6992 return 1;
6993 case TDmode:
6994 classes[0] = X86_64_SSE_CLASS;
6995 classes[1] = X86_64_SSEUP_CLASS;
6996 return 2;
6997 case DImode:
6998 case SImode:
6999 case HImode:
7000 case QImode:
7001 case CSImode:
7002 case CHImode:
7003 case CQImode:
7005 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
7007 /* Analyze last 128 bits only. */
7008 size = (size - 1) & 0x7f;
7010 if (size < 32)
7012 classes[0] = X86_64_INTEGERSI_CLASS;
7013 return 1;
7015 else if (size < 64)
7017 classes[0] = X86_64_INTEGER_CLASS;
7018 return 1;
7020 else if (size < 64+32)
7022 classes[0] = X86_64_INTEGER_CLASS;
7023 classes[1] = X86_64_INTEGERSI_CLASS;
7024 return 2;
7026 else if (size < 64+64)
7028 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7029 return 2;
7031 else
7032 gcc_unreachable ();
7034 case CDImode:
7035 case TImode:
7036 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7037 return 2;
7038 case COImode:
7039 case OImode:
7040 /* OImode shouldn't be used directly. */
7041 gcc_unreachable ();
7042 case CTImode:
7043 return 0;
7044 case SFmode:
7045 if (!(bit_offset % 64))
7046 classes[0] = X86_64_SSESF_CLASS;
7047 else
7048 classes[0] = X86_64_SSE_CLASS;
7049 return 1;
7050 case DFmode:
7051 classes[0] = X86_64_SSEDF_CLASS;
7052 return 1;
7053 case XFmode:
7054 classes[0] = X86_64_X87_CLASS;
7055 classes[1] = X86_64_X87UP_CLASS;
7056 return 2;
7057 case TFmode:
7058 classes[0] = X86_64_SSE_CLASS;
7059 classes[1] = X86_64_SSEUP_CLASS;
7060 return 2;
7061 case SCmode:
7062 classes[0] = X86_64_SSE_CLASS;
7063 if (!(bit_offset % 64))
7064 return 1;
7065 else
7067 static bool warned;
7069 if (!warned && warn_psabi)
7071 warned = true;
7072 inform (input_location,
7073 "the ABI of passing structure with complex float"
7074 " member has changed in GCC 4.4");
7076 classes[1] = X86_64_SSESF_CLASS;
7077 return 2;
7079 case DCmode:
7080 classes[0] = X86_64_SSEDF_CLASS;
7081 classes[1] = X86_64_SSEDF_CLASS;
7082 return 2;
7083 case XCmode:
7084 classes[0] = X86_64_COMPLEX_X87_CLASS;
7085 return 1;
7086 case TCmode:
7087 /* This modes is larger than 16 bytes. */
7088 return 0;
7089 case V8SFmode:
7090 case V8SImode:
7091 case V32QImode:
7092 case V16HImode:
7093 case V4DFmode:
7094 case V4DImode:
7095 classes[0] = X86_64_SSE_CLASS;
7096 classes[1] = X86_64_SSEUP_CLASS;
7097 classes[2] = X86_64_SSEUP_CLASS;
7098 classes[3] = X86_64_SSEUP_CLASS;
7099 return 4;
7100 case V8DFmode:
7101 case V16SFmode:
7102 case V8DImode:
7103 case V16SImode:
7104 case V32HImode:
7105 case V64QImode:
7106 classes[0] = X86_64_SSE_CLASS;
7107 classes[1] = X86_64_SSEUP_CLASS;
7108 classes[2] = X86_64_SSEUP_CLASS;
7109 classes[3] = X86_64_SSEUP_CLASS;
7110 classes[4] = X86_64_SSEUP_CLASS;
7111 classes[5] = X86_64_SSEUP_CLASS;
7112 classes[6] = X86_64_SSEUP_CLASS;
7113 classes[7] = X86_64_SSEUP_CLASS;
7114 return 8;
7115 case V4SFmode:
7116 case V4SImode:
7117 case V16QImode:
7118 case V8HImode:
7119 case V2DFmode:
7120 case V2DImode:
7121 classes[0] = X86_64_SSE_CLASS;
7122 classes[1] = X86_64_SSEUP_CLASS;
7123 return 2;
7124 case V1TImode:
7125 case V1DImode:
7126 case V2SFmode:
7127 case V2SImode:
7128 case V4HImode:
7129 case V8QImode:
7130 classes[0] = X86_64_SSE_CLASS;
7131 return 1;
7132 case BLKmode:
7133 case VOIDmode:
7134 return 0;
7135 default:
7136 gcc_assert (VECTOR_MODE_P (mode));
7138 if (bytes > 16)
7139 return 0;
7141 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7143 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7144 classes[0] = X86_64_INTEGERSI_CLASS;
7145 else
7146 classes[0] = X86_64_INTEGER_CLASS;
7147 classes[1] = X86_64_INTEGER_CLASS;
7148 return 1 + (bytes > 8);
7152 /* Examine the argument and return set number of register required in each
7153 class. Return true iff parameter should be passed in memory. */
7155 static bool
7156 examine_argument (machine_mode mode, const_tree type, int in_return,
7157 int *int_nregs, int *sse_nregs)
7159 enum x86_64_reg_class regclass[MAX_CLASSES];
7160 int n = classify_argument (mode, type, regclass, 0);
7162 *int_nregs = 0;
7163 *sse_nregs = 0;
7165 if (!n)
7166 return true;
7167 for (n--; n >= 0; n--)
7168 switch (regclass[n])
7170 case X86_64_INTEGER_CLASS:
7171 case X86_64_INTEGERSI_CLASS:
7172 (*int_nregs)++;
7173 break;
7174 case X86_64_SSE_CLASS:
7175 case X86_64_SSESF_CLASS:
7176 case X86_64_SSEDF_CLASS:
7177 (*sse_nregs)++;
7178 break;
7179 case X86_64_NO_CLASS:
7180 case X86_64_SSEUP_CLASS:
7181 break;
7182 case X86_64_X87_CLASS:
7183 case X86_64_X87UP_CLASS:
7184 case X86_64_COMPLEX_X87_CLASS:
7185 if (!in_return)
7186 return true;
7187 break;
7188 case X86_64_MEMORY_CLASS:
7189 gcc_unreachable ();
7192 return false;
7195 /* Construct container for the argument used by GCC interface. See
7196 FUNCTION_ARG for the detailed description. */
7198 static rtx
7199 construct_container (machine_mode mode, machine_mode orig_mode,
7200 const_tree type, int in_return, int nintregs, int nsseregs,
7201 const int *intreg, int sse_regno)
7203 /* The following variables hold the static issued_error state. */
7204 static bool issued_sse_arg_error;
7205 static bool issued_sse_ret_error;
7206 static bool issued_x87_ret_error;
7208 machine_mode tmpmode;
7209 int bytes =
7210 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7211 enum x86_64_reg_class regclass[MAX_CLASSES];
7212 int n;
7213 int i;
7214 int nexps = 0;
7215 int needed_sseregs, needed_intregs;
7216 rtx exp[MAX_CLASSES];
7217 rtx ret;
7219 n = classify_argument (mode, type, regclass, 0);
7220 if (!n)
7221 return NULL;
7222 if (examine_argument (mode, type, in_return, &needed_intregs,
7223 &needed_sseregs))
7224 return NULL;
7225 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7226 return NULL;
7228 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7229 some less clueful developer tries to use floating-point anyway. */
7230 if (needed_sseregs && !TARGET_SSE)
7232 if (in_return)
7234 if (!issued_sse_ret_error)
7236 error ("SSE register return with SSE disabled");
7237 issued_sse_ret_error = true;
7240 else if (!issued_sse_arg_error)
7242 error ("SSE register argument with SSE disabled");
7243 issued_sse_arg_error = true;
7245 return NULL;
7248 /* Likewise, error if the ABI requires us to return values in the
7249 x87 registers and the user specified -mno-80387. */
7250 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7251 for (i = 0; i < n; i++)
7252 if (regclass[i] == X86_64_X87_CLASS
7253 || regclass[i] == X86_64_X87UP_CLASS
7254 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7256 if (!issued_x87_ret_error)
7258 error ("x87 register return with x87 disabled");
7259 issued_x87_ret_error = true;
7261 return NULL;
7264 /* First construct simple cases. Avoid SCmode, since we want to use
7265 single register to pass this type. */
7266 if (n == 1 && mode != SCmode)
7267 switch (regclass[0])
7269 case X86_64_INTEGER_CLASS:
7270 case X86_64_INTEGERSI_CLASS:
7271 return gen_rtx_REG (mode, intreg[0]);
7272 case X86_64_SSE_CLASS:
7273 case X86_64_SSESF_CLASS:
7274 case X86_64_SSEDF_CLASS:
7275 if (mode != BLKmode)
7276 return gen_reg_or_parallel (mode, orig_mode,
7277 SSE_REGNO (sse_regno));
7278 break;
7279 case X86_64_X87_CLASS:
7280 case X86_64_COMPLEX_X87_CLASS:
7281 return gen_rtx_REG (mode, FIRST_STACK_REG);
7282 case X86_64_NO_CLASS:
7283 /* Zero sized array, struct or class. */
7284 return NULL;
7285 default:
7286 gcc_unreachable ();
7288 if (n == 2
7289 && regclass[0] == X86_64_SSE_CLASS
7290 && regclass[1] == X86_64_SSEUP_CLASS
7291 && mode != BLKmode)
7292 return gen_reg_or_parallel (mode, orig_mode,
7293 SSE_REGNO (sse_regno));
7294 if (n == 4
7295 && regclass[0] == X86_64_SSE_CLASS
7296 && regclass[1] == X86_64_SSEUP_CLASS
7297 && regclass[2] == X86_64_SSEUP_CLASS
7298 && regclass[3] == X86_64_SSEUP_CLASS
7299 && mode != BLKmode)
7300 return gen_reg_or_parallel (mode, orig_mode,
7301 SSE_REGNO (sse_regno));
7302 if (n == 8
7303 && regclass[0] == X86_64_SSE_CLASS
7304 && regclass[1] == X86_64_SSEUP_CLASS
7305 && regclass[2] == X86_64_SSEUP_CLASS
7306 && regclass[3] == X86_64_SSEUP_CLASS
7307 && regclass[4] == X86_64_SSEUP_CLASS
7308 && regclass[5] == X86_64_SSEUP_CLASS
7309 && regclass[6] == X86_64_SSEUP_CLASS
7310 && regclass[7] == X86_64_SSEUP_CLASS
7311 && mode != BLKmode)
7312 return gen_reg_or_parallel (mode, orig_mode,
7313 SSE_REGNO (sse_regno));
7314 if (n == 2
7315 && regclass[0] == X86_64_X87_CLASS
7316 && regclass[1] == X86_64_X87UP_CLASS)
7317 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7319 if (n == 2
7320 && regclass[0] == X86_64_INTEGER_CLASS
7321 && regclass[1] == X86_64_INTEGER_CLASS
7322 && (mode == CDImode || mode == TImode)
7323 && intreg[0] + 1 == intreg[1])
7324 return gen_rtx_REG (mode, intreg[0]);
7326 /* Otherwise figure out the entries of the PARALLEL. */
7327 for (i = 0; i < n; i++)
7329 int pos;
7331 switch (regclass[i])
7333 case X86_64_NO_CLASS:
7334 break;
7335 case X86_64_INTEGER_CLASS:
7336 case X86_64_INTEGERSI_CLASS:
7337 /* Merge TImodes on aligned occasions here too. */
7338 if (i * 8 + 8 > bytes)
7339 tmpmode
7340 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7341 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7342 tmpmode = SImode;
7343 else
7344 tmpmode = DImode;
7345 /* We've requested 24 bytes we
7346 don't have mode for. Use DImode. */
7347 if (tmpmode == BLKmode)
7348 tmpmode = DImode;
7349 exp [nexps++]
7350 = gen_rtx_EXPR_LIST (VOIDmode,
7351 gen_rtx_REG (tmpmode, *intreg),
7352 GEN_INT (i*8));
7353 intreg++;
7354 break;
7355 case X86_64_SSESF_CLASS:
7356 exp [nexps++]
7357 = gen_rtx_EXPR_LIST (VOIDmode,
7358 gen_rtx_REG (SFmode,
7359 SSE_REGNO (sse_regno)),
7360 GEN_INT (i*8));
7361 sse_regno++;
7362 break;
7363 case X86_64_SSEDF_CLASS:
7364 exp [nexps++]
7365 = gen_rtx_EXPR_LIST (VOIDmode,
7366 gen_rtx_REG (DFmode,
7367 SSE_REGNO (sse_regno)),
7368 GEN_INT (i*8));
7369 sse_regno++;
7370 break;
7371 case X86_64_SSE_CLASS:
7372 pos = i;
7373 switch (n)
7375 case 1:
7376 tmpmode = DImode;
7377 break;
7378 case 2:
7379 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7381 tmpmode = TImode;
7382 i++;
7384 else
7385 tmpmode = DImode;
7386 break;
7387 case 4:
7388 gcc_assert (i == 0
7389 && regclass[1] == X86_64_SSEUP_CLASS
7390 && regclass[2] == X86_64_SSEUP_CLASS
7391 && regclass[3] == X86_64_SSEUP_CLASS);
7392 tmpmode = OImode;
7393 i += 3;
7394 break;
7395 case 8:
7396 gcc_assert (i == 0
7397 && regclass[1] == X86_64_SSEUP_CLASS
7398 && regclass[2] == X86_64_SSEUP_CLASS
7399 && regclass[3] == X86_64_SSEUP_CLASS
7400 && regclass[4] == X86_64_SSEUP_CLASS
7401 && regclass[5] == X86_64_SSEUP_CLASS
7402 && regclass[6] == X86_64_SSEUP_CLASS
7403 && regclass[7] == X86_64_SSEUP_CLASS);
7404 tmpmode = XImode;
7405 i += 7;
7406 break;
7407 default:
7408 gcc_unreachable ();
7410 exp [nexps++]
7411 = gen_rtx_EXPR_LIST (VOIDmode,
7412 gen_rtx_REG (tmpmode,
7413 SSE_REGNO (sse_regno)),
7414 GEN_INT (pos*8));
7415 sse_regno++;
7416 break;
7417 default:
7418 gcc_unreachable ();
7422 /* Empty aligned struct, union or class. */
7423 if (nexps == 0)
7424 return NULL;
7426 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7427 for (i = 0; i < nexps; i++)
7428 XVECEXP (ret, 0, i) = exp [i];
7429 return ret;
7432 /* Update the data in CUM to advance over an argument of mode MODE
7433 and data type TYPE. (TYPE is null for libcalls where that information
7434 may not be available.)
7436 Return a number of integer regsiters advanced over. */
7438 static int
7439 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7440 const_tree type, HOST_WIDE_INT bytes,
7441 HOST_WIDE_INT words)
7443 int res = 0;
7445 switch (mode)
7447 default:
7448 break;
7450 case BLKmode:
7451 if (bytes < 0)
7452 break;
7453 /* FALLTHRU */
7455 case DImode:
7456 case SImode:
7457 case HImode:
7458 case QImode:
7459 cum->words += words;
7460 cum->nregs -= words;
7461 cum->regno += words;
7462 if (cum->nregs >= 0)
7463 res = words;
7464 if (cum->nregs <= 0)
7466 cum->nregs = 0;
7467 cum->regno = 0;
7469 break;
7471 case OImode:
7472 /* OImode shouldn't be used directly. */
7473 gcc_unreachable ();
7475 case DFmode:
7476 if (cum->float_in_sse < 2)
7477 break;
7478 case SFmode:
7479 if (cum->float_in_sse < 1)
7480 break;
7481 /* FALLTHRU */
7483 case V8SFmode:
7484 case V8SImode:
7485 case V64QImode:
7486 case V32HImode:
7487 case V16SImode:
7488 case V8DImode:
7489 case V16SFmode:
7490 case V8DFmode:
7491 case V32QImode:
7492 case V16HImode:
7493 case V4DFmode:
7494 case V4DImode:
7495 case TImode:
7496 case V16QImode:
7497 case V8HImode:
7498 case V4SImode:
7499 case V2DImode:
7500 case V4SFmode:
7501 case V2DFmode:
7502 if (!type || !AGGREGATE_TYPE_P (type))
7504 cum->sse_words += words;
7505 cum->sse_nregs -= 1;
7506 cum->sse_regno += 1;
7507 if (cum->sse_nregs <= 0)
7509 cum->sse_nregs = 0;
7510 cum->sse_regno = 0;
7513 break;
7515 case V8QImode:
7516 case V4HImode:
7517 case V2SImode:
7518 case V2SFmode:
7519 case V1TImode:
7520 case V1DImode:
7521 if (!type || !AGGREGATE_TYPE_P (type))
7523 cum->mmx_words += words;
7524 cum->mmx_nregs -= 1;
7525 cum->mmx_regno += 1;
7526 if (cum->mmx_nregs <= 0)
7528 cum->mmx_nregs = 0;
7529 cum->mmx_regno = 0;
7532 break;
7535 return res;
7538 static int
7539 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7540 const_tree type, HOST_WIDE_INT words, bool named)
7542 int int_nregs, sse_nregs;
7544 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7545 if (!named && (VALID_AVX512F_REG_MODE (mode)
7546 || VALID_AVX256_REG_MODE (mode)))
7547 return 0;
7549 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7550 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7552 cum->nregs -= int_nregs;
7553 cum->sse_nregs -= sse_nregs;
7554 cum->regno += int_nregs;
7555 cum->sse_regno += sse_nregs;
7556 return int_nregs;
7558 else
7560 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7561 cum->words = (cum->words + align - 1) & ~(align - 1);
7562 cum->words += words;
7563 return 0;
7567 static int
7568 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7569 HOST_WIDE_INT words)
7571 /* Otherwise, this should be passed indirect. */
7572 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7574 cum->words += words;
7575 if (cum->nregs > 0)
7577 cum->nregs -= 1;
7578 cum->regno += 1;
7579 return 1;
7581 return 0;
7584 /* Update the data in CUM to advance over an argument of mode MODE and
7585 data type TYPE. (TYPE is null for libcalls where that information
7586 may not be available.) */
7588 static void
7589 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7590 const_tree type, bool named)
7592 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7593 HOST_WIDE_INT bytes, words;
7594 int nregs;
7596 if (mode == BLKmode)
7597 bytes = int_size_in_bytes (type);
7598 else
7599 bytes = GET_MODE_SIZE (mode);
7600 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7602 if (type)
7603 mode = type_natural_mode (type, NULL, false);
7605 if ((type && POINTER_BOUNDS_TYPE_P (type))
7606 || POINTER_BOUNDS_MODE_P (mode))
7608 /* If we pass bounds in BT then just update remained bounds count. */
7609 if (cum->bnds_in_bt)
7611 cum->bnds_in_bt--;
7612 return;
7615 /* Update remained number of bounds to force. */
7616 if (cum->force_bnd_pass)
7617 cum->force_bnd_pass--;
7619 cum->bnd_regno++;
7621 return;
7624 /* The first arg not going to Bounds Tables resets this counter. */
7625 cum->bnds_in_bt = 0;
7626 /* For unnamed args we always pass bounds to avoid bounds mess when
7627 passed and received types do not match. If bounds do not follow
7628 unnamed arg, still pretend required number of bounds were passed. */
7629 if (cum->force_bnd_pass)
7631 cum->bnd_regno += cum->force_bnd_pass;
7632 cum->force_bnd_pass = 0;
7635 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7636 nregs = function_arg_advance_ms_64 (cum, bytes, words);
7637 else if (TARGET_64BIT)
7638 nregs = function_arg_advance_64 (cum, mode, type, words, named);
7639 else
7640 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7642 /* For stdarg we expect bounds to be passed for each value passed
7643 in register. */
7644 if (cum->stdarg)
7645 cum->force_bnd_pass = nregs;
7646 /* For pointers passed in memory we expect bounds passed in Bounds
7647 Table. */
7648 if (!nregs)
7649 cum->bnds_in_bt = chkp_type_bounds_count (type);
7652 /* Define where to put the arguments to a function.
7653 Value is zero to push the argument on the stack,
7654 or a hard register in which to store the argument.
7656 MODE is the argument's machine mode.
7657 TYPE is the data type of the argument (as a tree).
7658 This is null for libcalls where that information may
7659 not be available.
7660 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7661 the preceding args and about the function being called.
7662 NAMED is nonzero if this argument is a named parameter
7663 (otherwise it is an extra parameter matching an ellipsis). */
7665 static rtx
7666 function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7667 machine_mode orig_mode, const_tree type,
7668 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7670 /* Avoid the AL settings for the Unix64 ABI. */
7671 if (mode == VOIDmode)
7672 return constm1_rtx;
7674 switch (mode)
7676 default:
7677 break;
7679 case BLKmode:
7680 if (bytes < 0)
7681 break;
7682 /* FALLTHRU */
7683 case DImode:
7684 case SImode:
7685 case HImode:
7686 case QImode:
7687 if (words <= cum->nregs)
7689 int regno = cum->regno;
7691 /* Fastcall allocates the first two DWORD (SImode) or
7692 smaller arguments to ECX and EDX if it isn't an
7693 aggregate type . */
7694 if (cum->fastcall)
7696 if (mode == BLKmode
7697 || mode == DImode
7698 || (type && AGGREGATE_TYPE_P (type)))
7699 break;
7701 /* ECX not EAX is the first allocated register. */
7702 if (regno == AX_REG)
7703 regno = CX_REG;
7705 return gen_rtx_REG (mode, regno);
7707 break;
7709 case DFmode:
7710 if (cum->float_in_sse < 2)
7711 break;
7712 case SFmode:
7713 if (cum->float_in_sse < 1)
7714 break;
7715 /* FALLTHRU */
7716 case TImode:
7717 /* In 32bit, we pass TImode in xmm registers. */
7718 case V16QImode:
7719 case V8HImode:
7720 case V4SImode:
7721 case V2DImode:
7722 case V4SFmode:
7723 case V2DFmode:
7724 if (!type || !AGGREGATE_TYPE_P (type))
7726 if (cum->sse_nregs)
7727 return gen_reg_or_parallel (mode, orig_mode,
7728 cum->sse_regno + FIRST_SSE_REG);
7730 break;
7732 case OImode:
7733 case XImode:
7734 /* OImode and XImode shouldn't be used directly. */
7735 gcc_unreachable ();
7737 case V64QImode:
7738 case V32HImode:
7739 case V16SImode:
7740 case V8DImode:
7741 case V16SFmode:
7742 case V8DFmode:
7743 case V8SFmode:
7744 case V8SImode:
7745 case V32QImode:
7746 case V16HImode:
7747 case V4DFmode:
7748 case V4DImode:
7749 if (!type || !AGGREGATE_TYPE_P (type))
7751 if (cum->sse_nregs)
7752 return gen_reg_or_parallel (mode, orig_mode,
7753 cum->sse_regno + FIRST_SSE_REG);
7755 break;
7757 case V8QImode:
7758 case V4HImode:
7759 case V2SImode:
7760 case V2SFmode:
7761 case V1TImode:
7762 case V1DImode:
7763 if (!type || !AGGREGATE_TYPE_P (type))
7765 if (cum->mmx_nregs)
7766 return gen_reg_or_parallel (mode, orig_mode,
7767 cum->mmx_regno + FIRST_MMX_REG);
7769 break;
7772 return NULL_RTX;
7775 static rtx
7776 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7777 machine_mode orig_mode, const_tree type, bool named)
7779 /* Handle a hidden AL argument containing number of registers
7780 for varargs x86-64 functions. */
7781 if (mode == VOIDmode)
7782 return GEN_INT (cum->maybe_vaarg
7783 ? (cum->sse_nregs < 0
7784 ? X86_64_SSE_REGPARM_MAX
7785 : cum->sse_regno)
7786 : -1);
7788 switch (mode)
7790 default:
7791 break;
7793 case V8SFmode:
7794 case V8SImode:
7795 case V32QImode:
7796 case V16HImode:
7797 case V4DFmode:
7798 case V4DImode:
7799 case V16SFmode:
7800 case V16SImode:
7801 case V64QImode:
7802 case V32HImode:
7803 case V8DFmode:
7804 case V8DImode:
7805 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7806 if (!named)
7807 return NULL;
7808 break;
7811 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7812 cum->sse_nregs,
7813 &x86_64_int_parameter_registers [cum->regno],
7814 cum->sse_regno);
7817 static rtx
7818 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7819 machine_mode orig_mode, bool named,
7820 HOST_WIDE_INT bytes)
7822 unsigned int regno;
7824 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7825 We use value of -2 to specify that current function call is MSABI. */
7826 if (mode == VOIDmode)
7827 return GEN_INT (-2);
7829 /* If we've run out of registers, it goes on the stack. */
7830 if (cum->nregs == 0)
7831 return NULL_RTX;
7833 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7835 /* Only floating point modes are passed in anything but integer regs. */
7836 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7838 if (named)
7839 regno = cum->regno + FIRST_SSE_REG;
7840 else
7842 rtx t1, t2;
7844 /* Unnamed floating parameters are passed in both the
7845 SSE and integer registers. */
7846 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7847 t2 = gen_rtx_REG (mode, regno);
7848 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7849 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7850 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7853 /* Handle aggregated types passed in register. */
7854 if (orig_mode == BLKmode)
7856 if (bytes > 0 && bytes <= 8)
7857 mode = (bytes > 4 ? DImode : SImode);
7858 if (mode == BLKmode)
7859 mode = DImode;
7862 return gen_reg_or_parallel (mode, orig_mode, regno);
7865 /* Return where to put the arguments to a function.
7866 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7868 MODE is the argument's machine mode. TYPE is the data type of the
7869 argument. It is null for libcalls where that information may not be
7870 available. CUM gives information about the preceding args and about
7871 the function being called. NAMED is nonzero if this argument is a
7872 named parameter (otherwise it is an extra parameter matching an
7873 ellipsis). */
7875 static rtx
7876 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
7877 const_tree type, bool named)
7879 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7880 machine_mode mode = omode;
7881 HOST_WIDE_INT bytes, words;
7882 rtx arg;
7884 /* All pointer bounds argumntas are handled separately here. */
7885 if ((type && POINTER_BOUNDS_TYPE_P (type))
7886 || POINTER_BOUNDS_MODE_P (mode))
7888 /* Return NULL if bounds are forced to go in Bounds Table. */
7889 if (cum->bnds_in_bt)
7890 arg = NULL;
7891 /* Return the next available bound reg if any. */
7892 else if (cum->bnd_regno <= LAST_BND_REG)
7893 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
7894 /* Return the next special slot number otherwise. */
7895 else
7896 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
7898 return arg;
7901 if (mode == BLKmode)
7902 bytes = int_size_in_bytes (type);
7903 else
7904 bytes = GET_MODE_SIZE (mode);
7905 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7907 /* To simplify the code below, represent vector types with a vector mode
7908 even if MMX/SSE are not active. */
7909 if (type && TREE_CODE (type) == VECTOR_TYPE)
7910 mode = type_natural_mode (type, cum, false);
7912 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7913 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7914 else if (TARGET_64BIT)
7915 arg = function_arg_64 (cum, mode, omode, type, named);
7916 else
7917 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7919 return arg;
7922 /* A C expression that indicates when an argument must be passed by
7923 reference. If nonzero for an argument, a copy of that argument is
7924 made in memory and a pointer to the argument is passed instead of
7925 the argument itself. The pointer is passed in whatever way is
7926 appropriate for passing a pointer to that type. */
7928 static bool
7929 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7930 const_tree type, bool)
7932 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7934 /* Bounds are never passed by reference. */
7935 if ((type && POINTER_BOUNDS_TYPE_P (type))
7936 || POINTER_BOUNDS_MODE_P (mode))
7937 return false;
7939 /* See Windows x64 Software Convention. */
7940 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7942 int msize = (int) GET_MODE_SIZE (mode);
7943 if (type)
7945 /* Arrays are passed by reference. */
7946 if (TREE_CODE (type) == ARRAY_TYPE)
7947 return true;
7949 if (AGGREGATE_TYPE_P (type))
7951 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7952 are passed by reference. */
7953 msize = int_size_in_bytes (type);
7957 /* __m128 is passed by reference. */
7958 switch (msize) {
7959 case 1: case 2: case 4: case 8:
7960 break;
7961 default:
7962 return true;
7965 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7966 return 1;
7968 return 0;
7971 /* Return true when TYPE should be 128bit aligned for 32bit argument
7972 passing ABI. XXX: This function is obsolete and is only used for
7973 checking psABI compatibility with previous versions of GCC. */
7975 static bool
7976 ix86_compat_aligned_value_p (const_tree type)
7978 machine_mode mode = TYPE_MODE (type);
7979 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7980 || mode == TDmode
7981 || mode == TFmode
7982 || mode == TCmode)
7983 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7984 return true;
7985 if (TYPE_ALIGN (type) < 128)
7986 return false;
7988 if (AGGREGATE_TYPE_P (type))
7990 /* Walk the aggregates recursively. */
7991 switch (TREE_CODE (type))
7993 case RECORD_TYPE:
7994 case UNION_TYPE:
7995 case QUAL_UNION_TYPE:
7997 tree field;
7999 /* Walk all the structure fields. */
8000 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8002 if (TREE_CODE (field) == FIELD_DECL
8003 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
8004 return true;
8006 break;
8009 case ARRAY_TYPE:
8010 /* Just for use if some languages passes arrays by value. */
8011 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
8012 return true;
8013 break;
8015 default:
8016 gcc_unreachable ();
8019 return false;
8022 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
8023 XXX: This function is obsolete and is only used for checking psABI
8024 compatibility with previous versions of GCC. */
8026 static unsigned int
8027 ix86_compat_function_arg_boundary (machine_mode mode,
8028 const_tree type, unsigned int align)
8030 /* In 32bit, only _Decimal128 and __float128 are aligned to their
8031 natural boundaries. */
8032 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
8034 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
8035 make an exception for SSE modes since these require 128bit
8036 alignment.
8038 The handling here differs from field_alignment. ICC aligns MMX
8039 arguments to 4 byte boundaries, while structure fields are aligned
8040 to 8 byte boundaries. */
8041 if (!type)
8043 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
8044 align = PARM_BOUNDARY;
8046 else
8048 if (!ix86_compat_aligned_value_p (type))
8049 align = PARM_BOUNDARY;
8052 if (align > BIGGEST_ALIGNMENT)
8053 align = BIGGEST_ALIGNMENT;
8054 return align;
8057 /* Return true when TYPE should be 128bit aligned for 32bit argument
8058 passing ABI. */
8060 static bool
8061 ix86_contains_aligned_value_p (const_tree type)
8063 machine_mode mode = TYPE_MODE (type);
8065 if (mode == XFmode || mode == XCmode)
8066 return false;
8068 if (TYPE_ALIGN (type) < 128)
8069 return false;
8071 if (AGGREGATE_TYPE_P (type))
8073 /* Walk the aggregates recursively. */
8074 switch (TREE_CODE (type))
8076 case RECORD_TYPE:
8077 case UNION_TYPE:
8078 case QUAL_UNION_TYPE:
8080 tree field;
8082 /* Walk all the structure fields. */
8083 for (field = TYPE_FIELDS (type);
8084 field;
8085 field = DECL_CHAIN (field))
8087 if (TREE_CODE (field) == FIELD_DECL
8088 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
8089 return true;
8091 break;
8094 case ARRAY_TYPE:
8095 /* Just for use if some languages passes arrays by value. */
8096 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
8097 return true;
8098 break;
8100 default:
8101 gcc_unreachable ();
8104 else
8105 return TYPE_ALIGN (type) >= 128;
8107 return false;
8110 /* Gives the alignment boundary, in bits, of an argument with the
8111 specified mode and type. */
8113 static unsigned int
8114 ix86_function_arg_boundary (machine_mode mode, const_tree type)
8116 unsigned int align;
8117 if (type)
8119 /* Since the main variant type is used for call, we convert it to
8120 the main variant type. */
8121 type = TYPE_MAIN_VARIANT (type);
8122 align = TYPE_ALIGN (type);
8124 else
8125 align = GET_MODE_ALIGNMENT (mode);
8126 if (align < PARM_BOUNDARY)
8127 align = PARM_BOUNDARY;
8128 else
8130 static bool warned;
8131 unsigned int saved_align = align;
8133 if (!TARGET_64BIT)
8135 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8136 if (!type)
8138 if (mode == XFmode || mode == XCmode)
8139 align = PARM_BOUNDARY;
8141 else if (!ix86_contains_aligned_value_p (type))
8142 align = PARM_BOUNDARY;
8144 if (align < 128)
8145 align = PARM_BOUNDARY;
8148 if (warn_psabi
8149 && !warned
8150 && align != ix86_compat_function_arg_boundary (mode, type,
8151 saved_align))
8153 warned = true;
8154 inform (input_location,
8155 "The ABI for passing parameters with %d-byte"
8156 " alignment has changed in GCC 4.6",
8157 align / BITS_PER_UNIT);
8161 return align;
8164 /* Return true if N is a possible register number of function value. */
8166 static bool
8167 ix86_function_value_regno_p (const unsigned int regno)
8169 switch (regno)
8171 case AX_REG:
8172 return true;
8173 case DX_REG:
8174 return (!TARGET_64BIT || ix86_abi != MS_ABI);
8175 case DI_REG:
8176 case SI_REG:
8177 return TARGET_64BIT && ix86_abi != MS_ABI;
8179 case FIRST_BND_REG:
8180 return chkp_function_instrumented_p (current_function_decl);
8182 /* Complex values are returned in %st(0)/%st(1) pair. */
8183 case ST0_REG:
8184 case ST1_REG:
8185 /* TODO: The function should depend on current function ABI but
8186 builtins.c would need updating then. Therefore we use the
8187 default ABI. */
8188 if (TARGET_64BIT && ix86_abi == MS_ABI)
8189 return false;
8190 return TARGET_FLOAT_RETURNS_IN_80387;
8192 /* Complex values are returned in %xmm0/%xmm1 pair. */
8193 case XMM0_REG:
8194 case XMM1_REG:
8195 return TARGET_SSE;
8197 case MM0_REG:
8198 if (TARGET_MACHO || TARGET_64BIT)
8199 return false;
8200 return TARGET_MMX;
8203 return false;
8206 /* Define how to find the value returned by a function.
8207 VALTYPE is the data type of the value (as a tree).
8208 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8209 otherwise, FUNC is 0. */
8211 static rtx
8212 function_value_32 (machine_mode orig_mode, machine_mode mode,
8213 const_tree fntype, const_tree fn)
8215 unsigned int regno;
8217 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8218 we normally prevent this case when mmx is not available. However
8219 some ABIs may require the result to be returned like DImode. */
8220 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8221 regno = FIRST_MMX_REG;
8223 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8224 we prevent this case when sse is not available. However some ABIs
8225 may require the result to be returned like integer TImode. */
8226 else if (mode == TImode
8227 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8228 regno = FIRST_SSE_REG;
8230 /* 32-byte vector modes in %ymm0. */
8231 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8232 regno = FIRST_SSE_REG;
8234 /* 64-byte vector modes in %zmm0. */
8235 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8236 regno = FIRST_SSE_REG;
8238 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8239 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8240 regno = FIRST_FLOAT_REG;
8241 else
8242 /* Most things go in %eax. */
8243 regno = AX_REG;
8245 /* Override FP return register with %xmm0 for local functions when
8246 SSE math is enabled or for functions with sseregparm attribute. */
8247 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8249 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8250 if ((sse_level >= 1 && mode == SFmode)
8251 || (sse_level == 2 && mode == DFmode))
8252 regno = FIRST_SSE_REG;
8255 /* OImode shouldn't be used directly. */
8256 gcc_assert (mode != OImode);
8258 return gen_rtx_REG (orig_mode, regno);
8261 static rtx
8262 function_value_64 (machine_mode orig_mode, machine_mode mode,
8263 const_tree valtype)
8265 rtx ret;
8267 /* Handle libcalls, which don't provide a type node. */
8268 if (valtype == NULL)
8270 unsigned int regno;
8272 switch (mode)
8274 case SFmode:
8275 case SCmode:
8276 case DFmode:
8277 case DCmode:
8278 case TFmode:
8279 case SDmode:
8280 case DDmode:
8281 case TDmode:
8282 regno = FIRST_SSE_REG;
8283 break;
8284 case XFmode:
8285 case XCmode:
8286 regno = FIRST_FLOAT_REG;
8287 break;
8288 case TCmode:
8289 return NULL;
8290 default:
8291 regno = AX_REG;
8294 return gen_rtx_REG (mode, regno);
8296 else if (POINTER_TYPE_P (valtype))
8298 /* Pointers are always returned in word_mode. */
8299 mode = word_mode;
8302 ret = construct_container (mode, orig_mode, valtype, 1,
8303 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8304 x86_64_int_return_registers, 0);
8306 /* For zero sized structures, construct_container returns NULL, but we
8307 need to keep rest of compiler happy by returning meaningful value. */
8308 if (!ret)
8309 ret = gen_rtx_REG (orig_mode, AX_REG);
8311 return ret;
8314 static rtx
8315 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8316 const_tree valtype)
8318 unsigned int regno = AX_REG;
8320 if (TARGET_SSE)
8322 switch (GET_MODE_SIZE (mode))
8324 case 16:
8325 if (valtype != NULL_TREE
8326 && !VECTOR_INTEGER_TYPE_P (valtype)
8327 && !VECTOR_INTEGER_TYPE_P (valtype)
8328 && !INTEGRAL_TYPE_P (valtype)
8329 && !VECTOR_FLOAT_TYPE_P (valtype))
8330 break;
8331 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8332 && !COMPLEX_MODE_P (mode))
8333 regno = FIRST_SSE_REG;
8334 break;
8335 case 8:
8336 case 4:
8337 if (mode == SFmode || mode == DFmode)
8338 regno = FIRST_SSE_REG;
8339 break;
8340 default:
8341 break;
8344 return gen_rtx_REG (orig_mode, regno);
8347 static rtx
8348 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8349 machine_mode orig_mode, machine_mode mode)
8351 const_tree fn, fntype;
8353 fn = NULL_TREE;
8354 if (fntype_or_decl && DECL_P (fntype_or_decl))
8355 fn = fntype_or_decl;
8356 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8358 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8359 || POINTER_BOUNDS_MODE_P (mode))
8360 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8361 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8362 return function_value_ms_64 (orig_mode, mode, valtype);
8363 else if (TARGET_64BIT)
8364 return function_value_64 (orig_mode, mode, valtype);
8365 else
8366 return function_value_32 (orig_mode, mode, fntype, fn);
8369 static rtx
8370 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8372 machine_mode mode, orig_mode;
8374 orig_mode = TYPE_MODE (valtype);
8375 mode = type_natural_mode (valtype, NULL, true);
8376 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8379 /* Return an RTX representing a place where a function returns
8380 or recieves pointer bounds or NULL if no bounds are returned.
8382 VALTYPE is a data type of a value returned by the function.
8384 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8385 or FUNCTION_TYPE of the function.
8387 If OUTGOING is false, return a place in which the caller will
8388 see the return value. Otherwise, return a place where a
8389 function returns a value. */
8391 static rtx
8392 ix86_function_value_bounds (const_tree valtype,
8393 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8394 bool outgoing ATTRIBUTE_UNUSED)
8396 rtx res = NULL_RTX;
8398 if (BOUNDED_TYPE_P (valtype))
8399 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8400 else if (chkp_type_has_pointer (valtype))
8402 bitmap slots;
8403 rtx bounds[2];
8404 bitmap_iterator bi;
8405 unsigned i, bnd_no = 0;
8407 bitmap_obstack_initialize (NULL);
8408 slots = BITMAP_ALLOC (NULL);
8409 chkp_find_bound_slots (valtype, slots);
8411 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8413 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8414 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8415 gcc_assert (bnd_no < 2);
8416 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8419 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8421 BITMAP_FREE (slots);
8422 bitmap_obstack_release (NULL);
8424 else
8425 res = NULL_RTX;
8427 return res;
8430 /* Pointer function arguments and return values are promoted to
8431 word_mode. */
8433 static machine_mode
8434 ix86_promote_function_mode (const_tree type, machine_mode mode,
8435 int *punsignedp, const_tree fntype,
8436 int for_return)
8438 if (type != NULL_TREE && POINTER_TYPE_P (type))
8440 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8441 return word_mode;
8443 return default_promote_function_mode (type, mode, punsignedp, fntype,
8444 for_return);
8447 /* Return true if a structure, union or array with MODE containing FIELD
8448 should be accessed using BLKmode. */
8450 static bool
8451 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8453 /* Union with XFmode must be in BLKmode. */
8454 return (mode == XFmode
8455 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8456 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8460 ix86_libcall_value (machine_mode mode)
8462 return ix86_function_value_1 (NULL, NULL, mode, mode);
8465 /* Return true iff type is returned in memory. */
8467 static bool
8468 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8470 #ifdef SUBTARGET_RETURN_IN_MEMORY
8471 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8472 #else
8473 const machine_mode mode = type_natural_mode (type, NULL, true);
8474 HOST_WIDE_INT size;
8476 if (POINTER_BOUNDS_TYPE_P (type))
8477 return false;
8479 if (TARGET_64BIT)
8481 if (ix86_function_type_abi (fntype) == MS_ABI)
8483 size = int_size_in_bytes (type);
8485 /* __m128 is returned in xmm0. */
8486 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8487 || INTEGRAL_TYPE_P (type)
8488 || VECTOR_FLOAT_TYPE_P (type))
8489 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8490 && !COMPLEX_MODE_P (mode)
8491 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8492 return false;
8494 /* Otherwise, the size must be exactly in [1248]. */
8495 return size != 1 && size != 2 && size != 4 && size != 8;
8497 else
8499 int needed_intregs, needed_sseregs;
8501 return examine_argument (mode, type, 1,
8502 &needed_intregs, &needed_sseregs);
8505 else
8507 if (mode == BLKmode)
8508 return true;
8510 size = int_size_in_bytes (type);
8512 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8513 return false;
8515 if (VECTOR_MODE_P (mode) || mode == TImode)
8517 /* User-created vectors small enough to fit in EAX. */
8518 if (size < 8)
8519 return false;
8521 /* Unless ABI prescibes otherwise,
8522 MMX/3dNow values are returned in MM0 if available. */
8524 if (size == 8)
8525 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8527 /* SSE values are returned in XMM0 if available. */
8528 if (size == 16)
8529 return !TARGET_SSE;
8531 /* AVX values are returned in YMM0 if available. */
8532 if (size == 32)
8533 return !TARGET_AVX;
8535 /* AVX512F values are returned in ZMM0 if available. */
8536 if (size == 64)
8537 return !TARGET_AVX512F;
8540 if (mode == XFmode)
8541 return false;
8543 if (size > 12)
8544 return true;
8546 /* OImode shouldn't be used directly. */
8547 gcc_assert (mode != OImode);
8549 return false;
8551 #endif
8555 /* Create the va_list data type. */
8557 /* Returns the calling convention specific va_list date type.
8558 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8560 static tree
8561 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8563 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8565 /* For i386 we use plain pointer to argument area. */
8566 if (!TARGET_64BIT || abi == MS_ABI)
8567 return build_pointer_type (char_type_node);
8569 record = lang_hooks.types.make_type (RECORD_TYPE);
8570 type_decl = build_decl (BUILTINS_LOCATION,
8571 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8573 f_gpr = build_decl (BUILTINS_LOCATION,
8574 FIELD_DECL, get_identifier ("gp_offset"),
8575 unsigned_type_node);
8576 f_fpr = build_decl (BUILTINS_LOCATION,
8577 FIELD_DECL, get_identifier ("fp_offset"),
8578 unsigned_type_node);
8579 f_ovf = build_decl (BUILTINS_LOCATION,
8580 FIELD_DECL, get_identifier ("overflow_arg_area"),
8581 ptr_type_node);
8582 f_sav = build_decl (BUILTINS_LOCATION,
8583 FIELD_DECL, get_identifier ("reg_save_area"),
8584 ptr_type_node);
8586 va_list_gpr_counter_field = f_gpr;
8587 va_list_fpr_counter_field = f_fpr;
8589 DECL_FIELD_CONTEXT (f_gpr) = record;
8590 DECL_FIELD_CONTEXT (f_fpr) = record;
8591 DECL_FIELD_CONTEXT (f_ovf) = record;
8592 DECL_FIELD_CONTEXT (f_sav) = record;
8594 TYPE_STUB_DECL (record) = type_decl;
8595 TYPE_NAME (record) = type_decl;
8596 TYPE_FIELDS (record) = f_gpr;
8597 DECL_CHAIN (f_gpr) = f_fpr;
8598 DECL_CHAIN (f_fpr) = f_ovf;
8599 DECL_CHAIN (f_ovf) = f_sav;
8601 layout_type (record);
8603 /* The correct type is an array type of one element. */
8604 return build_array_type (record, build_index_type (size_zero_node));
8607 /* Setup the builtin va_list data type and for 64-bit the additional
8608 calling convention specific va_list data types. */
8610 static tree
8611 ix86_build_builtin_va_list (void)
8613 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8615 /* Initialize abi specific va_list builtin types. */
8616 if (TARGET_64BIT)
8618 tree t;
8619 if (ix86_abi == MS_ABI)
8621 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8622 if (TREE_CODE (t) != RECORD_TYPE)
8623 t = build_variant_type_copy (t);
8624 sysv_va_list_type_node = t;
8626 else
8628 t = ret;
8629 if (TREE_CODE (t) != RECORD_TYPE)
8630 t = build_variant_type_copy (t);
8631 sysv_va_list_type_node = t;
8633 if (ix86_abi != MS_ABI)
8635 t = ix86_build_builtin_va_list_abi (MS_ABI);
8636 if (TREE_CODE (t) != RECORD_TYPE)
8637 t = build_variant_type_copy (t);
8638 ms_va_list_type_node = t;
8640 else
8642 t = ret;
8643 if (TREE_CODE (t) != RECORD_TYPE)
8644 t = build_variant_type_copy (t);
8645 ms_va_list_type_node = t;
8649 return ret;
8652 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8654 static void
8655 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8657 rtx save_area, mem;
8658 alias_set_type set;
8659 int i, max;
8661 /* GPR size of varargs save area. */
8662 if (cfun->va_list_gpr_size)
8663 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8664 else
8665 ix86_varargs_gpr_size = 0;
8667 /* FPR size of varargs save area. We don't need it if we don't pass
8668 anything in SSE registers. */
8669 if (TARGET_SSE && cfun->va_list_fpr_size)
8670 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8671 else
8672 ix86_varargs_fpr_size = 0;
8674 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8675 return;
8677 save_area = frame_pointer_rtx;
8678 set = get_varargs_alias_set ();
8680 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8681 if (max > X86_64_REGPARM_MAX)
8682 max = X86_64_REGPARM_MAX;
8684 for (i = cum->regno; i < max; i++)
8686 mem = gen_rtx_MEM (word_mode,
8687 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8688 MEM_NOTRAP_P (mem) = 1;
8689 set_mem_alias_set (mem, set);
8690 emit_move_insn (mem,
8691 gen_rtx_REG (word_mode,
8692 x86_64_int_parameter_registers[i]));
8695 if (ix86_varargs_fpr_size)
8697 machine_mode smode;
8698 rtx_code_label *label;
8699 rtx test;
8701 /* Now emit code to save SSE registers. The AX parameter contains number
8702 of SSE parameter registers used to call this function, though all we
8703 actually check here is the zero/non-zero status. */
8705 label = gen_label_rtx ();
8706 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8707 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8708 label));
8710 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8711 we used movdqa (i.e. TImode) instead? Perhaps even better would
8712 be if we could determine the real mode of the data, via a hook
8713 into pass_stdarg. Ignore all that for now. */
8714 smode = V4SFmode;
8715 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8716 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8718 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8719 if (max > X86_64_SSE_REGPARM_MAX)
8720 max = X86_64_SSE_REGPARM_MAX;
8722 for (i = cum->sse_regno; i < max; ++i)
8724 mem = plus_constant (Pmode, save_area,
8725 i * 16 + ix86_varargs_gpr_size);
8726 mem = gen_rtx_MEM (smode, mem);
8727 MEM_NOTRAP_P (mem) = 1;
8728 set_mem_alias_set (mem, set);
8729 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8731 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8734 emit_label (label);
8738 static void
8739 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8741 alias_set_type set = get_varargs_alias_set ();
8742 int i;
8744 /* Reset to zero, as there might be a sysv vaarg used
8745 before. */
8746 ix86_varargs_gpr_size = 0;
8747 ix86_varargs_fpr_size = 0;
8749 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8751 rtx reg, mem;
8753 mem = gen_rtx_MEM (Pmode,
8754 plus_constant (Pmode, virtual_incoming_args_rtx,
8755 i * UNITS_PER_WORD));
8756 MEM_NOTRAP_P (mem) = 1;
8757 set_mem_alias_set (mem, set);
8759 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8760 emit_move_insn (mem, reg);
8764 static void
8765 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8766 tree type, int *, int no_rtl)
8768 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8769 CUMULATIVE_ARGS next_cum;
8770 tree fntype;
8772 /* This argument doesn't appear to be used anymore. Which is good,
8773 because the old code here didn't suppress rtl generation. */
8774 gcc_assert (!no_rtl);
8776 if (!TARGET_64BIT)
8777 return;
8779 fntype = TREE_TYPE (current_function_decl);
8781 /* For varargs, we do not want to skip the dummy va_dcl argument.
8782 For stdargs, we do want to skip the last named argument. */
8783 next_cum = *cum;
8784 if (stdarg_p (fntype))
8785 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8786 true);
8788 if (cum->call_abi == MS_ABI)
8789 setup_incoming_varargs_ms_64 (&next_cum);
8790 else
8791 setup_incoming_varargs_64 (&next_cum);
8794 static void
8795 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8796 enum machine_mode mode,
8797 tree type,
8798 int *pretend_size ATTRIBUTE_UNUSED,
8799 int no_rtl)
8801 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8802 CUMULATIVE_ARGS next_cum;
8803 tree fntype;
8804 rtx save_area;
8805 int bnd_reg, i, max;
8807 gcc_assert (!no_rtl);
8809 /* Do nothing if we use plain pointer to argument area. */
8810 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8811 return;
8813 fntype = TREE_TYPE (current_function_decl);
8815 /* For varargs, we do not want to skip the dummy va_dcl argument.
8816 For stdargs, we do want to skip the last named argument. */
8817 next_cum = *cum;
8818 if (stdarg_p (fntype))
8819 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8820 true);
8821 save_area = frame_pointer_rtx;
8823 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8824 if (max > X86_64_REGPARM_MAX)
8825 max = X86_64_REGPARM_MAX;
8827 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8828 if (chkp_function_instrumented_p (current_function_decl))
8829 for (i = cum->regno; i < max; i++)
8831 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
8832 rtx reg = gen_rtx_REG (DImode,
8833 x86_64_int_parameter_registers[i]);
8834 rtx ptr = reg;
8835 rtx bounds;
8837 if (bnd_reg <= LAST_BND_REG)
8838 bounds = gen_rtx_REG (BNDmode, bnd_reg);
8839 else
8841 rtx ldx_addr =
8842 plus_constant (Pmode, arg_pointer_rtx,
8843 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
8844 bounds = gen_reg_rtx (BNDmode);
8845 emit_insn (BNDmode == BND64mode
8846 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
8847 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
8850 emit_insn (BNDmode == BND64mode
8851 ? gen_bnd64_stx (addr, ptr, bounds)
8852 : gen_bnd32_stx (addr, ptr, bounds));
8854 bnd_reg++;
8859 /* Checks if TYPE is of kind va_list char *. */
8861 static bool
8862 is_va_list_char_pointer (tree type)
8864 tree canonic;
8866 /* For 32-bit it is always true. */
8867 if (!TARGET_64BIT)
8868 return true;
8869 canonic = ix86_canonical_va_list_type (type);
8870 return (canonic == ms_va_list_type_node
8871 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8874 /* Implement va_start. */
8876 static void
8877 ix86_va_start (tree valist, rtx nextarg)
8879 HOST_WIDE_INT words, n_gpr, n_fpr;
8880 tree f_gpr, f_fpr, f_ovf, f_sav;
8881 tree gpr, fpr, ovf, sav, t;
8882 tree type;
8883 rtx ovf_rtx;
8885 if (flag_split_stack
8886 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8888 unsigned int scratch_regno;
8890 /* When we are splitting the stack, we can't refer to the stack
8891 arguments using internal_arg_pointer, because they may be on
8892 the old stack. The split stack prologue will arrange to
8893 leave a pointer to the old stack arguments in a scratch
8894 register, which we here copy to a pseudo-register. The split
8895 stack prologue can't set the pseudo-register directly because
8896 it (the prologue) runs before any registers have been saved. */
8898 scratch_regno = split_stack_prologue_scratch_regno ();
8899 if (scratch_regno != INVALID_REGNUM)
8901 rtx reg;
8902 rtx_insn *seq;
8904 reg = gen_reg_rtx (Pmode);
8905 cfun->machine->split_stack_varargs_pointer = reg;
8907 start_sequence ();
8908 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8909 seq = get_insns ();
8910 end_sequence ();
8912 push_topmost_sequence ();
8913 emit_insn_after (seq, entry_of_function ());
8914 pop_topmost_sequence ();
8918 /* Only 64bit target needs something special. */
8919 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8921 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8922 std_expand_builtin_va_start (valist, nextarg);
8923 else
8925 rtx va_r, next;
8927 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8928 next = expand_binop (ptr_mode, add_optab,
8929 cfun->machine->split_stack_varargs_pointer,
8930 crtl->args.arg_offset_rtx,
8931 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8932 convert_move (va_r, next, 0);
8934 /* Store zero bounds for va_list. */
8935 if (chkp_function_instrumented_p (current_function_decl))
8936 chkp_expand_bounds_reset_for_mem (valist,
8937 make_tree (TREE_TYPE (valist),
8938 next));
8941 return;
8944 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8945 f_fpr = DECL_CHAIN (f_gpr);
8946 f_ovf = DECL_CHAIN (f_fpr);
8947 f_sav = DECL_CHAIN (f_ovf);
8949 valist = build_simple_mem_ref (valist);
8950 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8951 /* The following should be folded into the MEM_REF offset. */
8952 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8953 f_gpr, NULL_TREE);
8954 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8955 f_fpr, NULL_TREE);
8956 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8957 f_ovf, NULL_TREE);
8958 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8959 f_sav, NULL_TREE);
8961 /* Count number of gp and fp argument registers used. */
8962 words = crtl->args.info.words;
8963 n_gpr = crtl->args.info.regno;
8964 n_fpr = crtl->args.info.sse_regno;
8966 if (cfun->va_list_gpr_size)
8968 type = TREE_TYPE (gpr);
8969 t = build2 (MODIFY_EXPR, type,
8970 gpr, build_int_cst (type, n_gpr * 8));
8971 TREE_SIDE_EFFECTS (t) = 1;
8972 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8975 if (TARGET_SSE && cfun->va_list_fpr_size)
8977 type = TREE_TYPE (fpr);
8978 t = build2 (MODIFY_EXPR, type, fpr,
8979 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8980 TREE_SIDE_EFFECTS (t) = 1;
8981 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8984 /* Find the overflow area. */
8985 type = TREE_TYPE (ovf);
8986 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8987 ovf_rtx = crtl->args.internal_arg_pointer;
8988 else
8989 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8990 t = make_tree (type, ovf_rtx);
8991 if (words != 0)
8992 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8994 /* Store zero bounds for overflow area pointer. */
8995 if (chkp_function_instrumented_p (current_function_decl))
8996 chkp_expand_bounds_reset_for_mem (ovf, t);
8998 t = build2 (MODIFY_EXPR, type, ovf, t);
8999 TREE_SIDE_EFFECTS (t) = 1;
9000 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9002 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
9004 /* Find the register save area.
9005 Prologue of the function save it right above stack frame. */
9006 type = TREE_TYPE (sav);
9007 t = make_tree (type, frame_pointer_rtx);
9008 if (!ix86_varargs_gpr_size)
9009 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
9011 /* Store zero bounds for save area pointer. */
9012 if (chkp_function_instrumented_p (current_function_decl))
9013 chkp_expand_bounds_reset_for_mem (sav, t);
9015 t = build2 (MODIFY_EXPR, type, sav, t);
9016 TREE_SIDE_EFFECTS (t) = 1;
9017 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9021 /* Implement va_arg. */
9023 static tree
9024 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9025 gimple_seq *post_p)
9027 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
9028 tree f_gpr, f_fpr, f_ovf, f_sav;
9029 tree gpr, fpr, ovf, sav, t;
9030 int size, rsize;
9031 tree lab_false, lab_over = NULL_TREE;
9032 tree addr, t2;
9033 rtx container;
9034 int indirect_p = 0;
9035 tree ptrtype;
9036 machine_mode nat_mode;
9037 unsigned int arg_boundary;
9039 /* Only 64bit target needs something special. */
9040 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
9041 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
9043 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9044 f_fpr = DECL_CHAIN (f_gpr);
9045 f_ovf = DECL_CHAIN (f_fpr);
9046 f_sav = DECL_CHAIN (f_ovf);
9048 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
9049 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
9050 valist = build_va_arg_indirect_ref (valist);
9051 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9052 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9053 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9055 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
9056 if (indirect_p)
9057 type = build_pointer_type (type);
9058 size = int_size_in_bytes (type);
9059 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
9061 nat_mode = type_natural_mode (type, NULL, false);
9062 switch (nat_mode)
9064 case V8SFmode:
9065 case V8SImode:
9066 case V32QImode:
9067 case V16HImode:
9068 case V4DFmode:
9069 case V4DImode:
9070 case V16SFmode:
9071 case V16SImode:
9072 case V64QImode:
9073 case V32HImode:
9074 case V8DFmode:
9075 case V8DImode:
9076 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9077 if (!TARGET_64BIT_MS_ABI)
9079 container = NULL;
9080 break;
9083 default:
9084 container = construct_container (nat_mode, TYPE_MODE (type),
9085 type, 0, X86_64_REGPARM_MAX,
9086 X86_64_SSE_REGPARM_MAX, intreg,
9088 break;
9091 /* Pull the value out of the saved registers. */
9093 addr = create_tmp_var (ptr_type_node, "addr");
9095 if (container)
9097 int needed_intregs, needed_sseregs;
9098 bool need_temp;
9099 tree int_addr, sse_addr;
9101 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9102 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9104 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
9106 need_temp = (!REG_P (container)
9107 && ((needed_intregs && TYPE_ALIGN (type) > 64)
9108 || TYPE_ALIGN (type) > 128));
9110 /* In case we are passing structure, verify that it is consecutive block
9111 on the register save area. If not we need to do moves. */
9112 if (!need_temp && !REG_P (container))
9114 /* Verify that all registers are strictly consecutive */
9115 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
9117 int i;
9119 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9121 rtx slot = XVECEXP (container, 0, i);
9122 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9123 || INTVAL (XEXP (slot, 1)) != i * 16)
9124 need_temp = true;
9127 else
9129 int i;
9131 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9133 rtx slot = XVECEXP (container, 0, i);
9134 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9135 || INTVAL (XEXP (slot, 1)) != i * 8)
9136 need_temp = true;
9140 if (!need_temp)
9142 int_addr = addr;
9143 sse_addr = addr;
9145 else
9147 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9148 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9151 /* First ensure that we fit completely in registers. */
9152 if (needed_intregs)
9154 t = build_int_cst (TREE_TYPE (gpr),
9155 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9156 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9157 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9158 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9159 gimplify_and_add (t, pre_p);
9161 if (needed_sseregs)
9163 t = build_int_cst (TREE_TYPE (fpr),
9164 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9165 + X86_64_REGPARM_MAX * 8);
9166 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9167 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9168 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9169 gimplify_and_add (t, pre_p);
9172 /* Compute index to start of area used for integer regs. */
9173 if (needed_intregs)
9175 /* int_addr = gpr + sav; */
9176 t = fold_build_pointer_plus (sav, gpr);
9177 gimplify_assign (int_addr, t, pre_p);
9179 if (needed_sseregs)
9181 /* sse_addr = fpr + sav; */
9182 t = fold_build_pointer_plus (sav, fpr);
9183 gimplify_assign (sse_addr, t, pre_p);
9185 if (need_temp)
9187 int i, prev_size = 0;
9188 tree temp = create_tmp_var (type, "va_arg_tmp");
9190 /* addr = &temp; */
9191 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9192 gimplify_assign (addr, t, pre_p);
9194 for (i = 0; i < XVECLEN (container, 0); i++)
9196 rtx slot = XVECEXP (container, 0, i);
9197 rtx reg = XEXP (slot, 0);
9198 machine_mode mode = GET_MODE (reg);
9199 tree piece_type;
9200 tree addr_type;
9201 tree daddr_type;
9202 tree src_addr, src;
9203 int src_offset;
9204 tree dest_addr, dest;
9205 int cur_size = GET_MODE_SIZE (mode);
9207 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9208 prev_size = INTVAL (XEXP (slot, 1));
9209 if (prev_size + cur_size > size)
9211 cur_size = size - prev_size;
9212 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9213 if (mode == BLKmode)
9214 mode = QImode;
9216 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9217 if (mode == GET_MODE (reg))
9218 addr_type = build_pointer_type (piece_type);
9219 else
9220 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9221 true);
9222 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9223 true);
9225 if (SSE_REGNO_P (REGNO (reg)))
9227 src_addr = sse_addr;
9228 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9230 else
9232 src_addr = int_addr;
9233 src_offset = REGNO (reg) * 8;
9235 src_addr = fold_convert (addr_type, src_addr);
9236 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9238 dest_addr = fold_convert (daddr_type, addr);
9239 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9240 if (cur_size == GET_MODE_SIZE (mode))
9242 src = build_va_arg_indirect_ref (src_addr);
9243 dest = build_va_arg_indirect_ref (dest_addr);
9245 gimplify_assign (dest, src, pre_p);
9247 else
9249 tree copy
9250 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9251 3, dest_addr, src_addr,
9252 size_int (cur_size));
9253 gimplify_and_add (copy, pre_p);
9255 prev_size += cur_size;
9259 if (needed_intregs)
9261 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9262 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9263 gimplify_assign (gpr, t, pre_p);
9266 if (needed_sseregs)
9268 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9269 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9270 gimplify_assign (fpr, t, pre_p);
9273 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9275 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9278 /* ... otherwise out of the overflow area. */
9280 /* When we align parameter on stack for caller, if the parameter
9281 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9282 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9283 here with caller. */
9284 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9285 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9286 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9288 /* Care for on-stack alignment if needed. */
9289 if (arg_boundary <= 64 || size == 0)
9290 t = ovf;
9291 else
9293 HOST_WIDE_INT align = arg_boundary / 8;
9294 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9295 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9296 build_int_cst (TREE_TYPE (t), -align));
9299 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9300 gimplify_assign (addr, t, pre_p);
9302 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9303 gimplify_assign (unshare_expr (ovf), t, pre_p);
9305 if (container)
9306 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9308 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9309 addr = fold_convert (ptrtype, addr);
9311 if (indirect_p)
9312 addr = build_va_arg_indirect_ref (addr);
9313 return build_va_arg_indirect_ref (addr);
9316 /* Return true if OPNUM's MEM should be matched
9317 in movabs* patterns. */
9319 bool
9320 ix86_check_movabs (rtx insn, int opnum)
9322 rtx set, mem;
9324 set = PATTERN (insn);
9325 if (GET_CODE (set) == PARALLEL)
9326 set = XVECEXP (set, 0, 0);
9327 gcc_assert (GET_CODE (set) == SET);
9328 mem = XEXP (set, opnum);
9329 while (GET_CODE (mem) == SUBREG)
9330 mem = SUBREG_REG (mem);
9331 gcc_assert (MEM_P (mem));
9332 return volatile_ok || !MEM_VOLATILE_P (mem);
9335 /* Initialize the table of extra 80387 mathematical constants. */
9337 static void
9338 init_ext_80387_constants (void)
9340 static const char * cst[5] =
9342 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9343 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9344 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9345 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9346 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9348 int i;
9350 for (i = 0; i < 5; i++)
9352 real_from_string (&ext_80387_constants_table[i], cst[i]);
9353 /* Ensure each constant is rounded to XFmode precision. */
9354 real_convert (&ext_80387_constants_table[i],
9355 XFmode, &ext_80387_constants_table[i]);
9358 ext_80387_constants_init = 1;
9361 /* Return non-zero if the constant is something that
9362 can be loaded with a special instruction. */
9365 standard_80387_constant_p (rtx x)
9367 machine_mode mode = GET_MODE (x);
9369 REAL_VALUE_TYPE r;
9371 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
9372 return -1;
9374 if (x == CONST0_RTX (mode))
9375 return 1;
9376 if (x == CONST1_RTX (mode))
9377 return 2;
9379 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9381 /* For XFmode constants, try to find a special 80387 instruction when
9382 optimizing for size or on those CPUs that benefit from them. */
9383 if (mode == XFmode
9384 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9386 int i;
9388 if (! ext_80387_constants_init)
9389 init_ext_80387_constants ();
9391 for (i = 0; i < 5; i++)
9392 if (real_identical (&r, &ext_80387_constants_table[i]))
9393 return i + 3;
9396 /* Load of the constant -0.0 or -1.0 will be split as
9397 fldz;fchs or fld1;fchs sequence. */
9398 if (real_isnegzero (&r))
9399 return 8;
9400 if (real_identical (&r, &dconstm1))
9401 return 9;
9403 return 0;
9406 /* Return the opcode of the special instruction to be used to load
9407 the constant X. */
9409 const char *
9410 standard_80387_constant_opcode (rtx x)
9412 switch (standard_80387_constant_p (x))
9414 case 1:
9415 return "fldz";
9416 case 2:
9417 return "fld1";
9418 case 3:
9419 return "fldlg2";
9420 case 4:
9421 return "fldln2";
9422 case 5:
9423 return "fldl2e";
9424 case 6:
9425 return "fldl2t";
9426 case 7:
9427 return "fldpi";
9428 case 8:
9429 case 9:
9430 return "#";
9431 default:
9432 gcc_unreachable ();
9436 /* Return the CONST_DOUBLE representing the 80387 constant that is
9437 loaded by the specified special instruction. The argument IDX
9438 matches the return value from standard_80387_constant_p. */
9441 standard_80387_constant_rtx (int idx)
9443 int i;
9445 if (! ext_80387_constants_init)
9446 init_ext_80387_constants ();
9448 switch (idx)
9450 case 3:
9451 case 4:
9452 case 5:
9453 case 6:
9454 case 7:
9455 i = idx - 3;
9456 break;
9458 default:
9459 gcc_unreachable ();
9462 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9463 XFmode);
9466 /* Return 1 if X is all 0s and 2 if x is all 1s
9467 in supported SSE/AVX vector mode. */
9470 standard_sse_constant_p (rtx x)
9472 machine_mode mode;
9474 if (!TARGET_SSE)
9475 return 0;
9477 mode = GET_MODE (x);
9479 if (x == const0_rtx || x == CONST0_RTX (mode))
9480 return 1;
9481 if (vector_all_ones_operand (x, mode))
9482 switch (mode)
9484 case V16QImode:
9485 case V8HImode:
9486 case V4SImode:
9487 case V2DImode:
9488 if (TARGET_SSE2)
9489 return 2;
9490 case V32QImode:
9491 case V16HImode:
9492 case V8SImode:
9493 case V4DImode:
9494 if (TARGET_AVX2)
9495 return 2;
9496 case V64QImode:
9497 case V32HImode:
9498 case V16SImode:
9499 case V8DImode:
9500 if (TARGET_AVX512F)
9501 return 2;
9502 default:
9503 break;
9506 return 0;
9509 /* Return the opcode of the special instruction to be used to load
9510 the constant X. */
9512 const char *
9513 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9515 switch (standard_sse_constant_p (x))
9517 case 1:
9518 switch (get_attr_mode (insn))
9520 case MODE_XI:
9521 return "vpxord\t%g0, %g0, %g0";
9522 case MODE_V16SF:
9523 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9524 : "vpxord\t%g0, %g0, %g0";
9525 case MODE_V8DF:
9526 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9527 : "vpxorq\t%g0, %g0, %g0";
9528 case MODE_TI:
9529 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9530 : "%vpxor\t%0, %d0";
9531 case MODE_V2DF:
9532 return "%vxorpd\t%0, %d0";
9533 case MODE_V4SF:
9534 return "%vxorps\t%0, %d0";
9536 case MODE_OI:
9537 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9538 : "vpxor\t%x0, %x0, %x0";
9539 case MODE_V4DF:
9540 return "vxorpd\t%x0, %x0, %x0";
9541 case MODE_V8SF:
9542 return "vxorps\t%x0, %x0, %x0";
9544 default:
9545 break;
9548 case 2:
9549 if (TARGET_AVX512VL
9550 || get_attr_mode (insn) == MODE_XI
9551 || get_attr_mode (insn) == MODE_V8DF
9552 || get_attr_mode (insn) == MODE_V16SF)
9553 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9554 if (TARGET_AVX)
9555 return "vpcmpeqd\t%0, %0, %0";
9556 else
9557 return "pcmpeqd\t%0, %0";
9559 default:
9560 break;
9562 gcc_unreachable ();
9565 /* Returns true if OP contains a symbol reference */
9567 bool
9568 symbolic_reference_mentioned_p (rtx op)
9570 const char *fmt;
9571 int i;
9573 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9574 return true;
9576 fmt = GET_RTX_FORMAT (GET_CODE (op));
9577 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9579 if (fmt[i] == 'E')
9581 int j;
9583 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9584 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9585 return true;
9588 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9589 return true;
9592 return false;
9595 /* Return true if it is appropriate to emit `ret' instructions in the
9596 body of a function. Do this only if the epilogue is simple, needing a
9597 couple of insns. Prior to reloading, we can't tell how many registers
9598 must be saved, so return false then. Return false if there is no frame
9599 marker to de-allocate. */
9601 bool
9602 ix86_can_use_return_insn_p (void)
9604 struct ix86_frame frame;
9606 if (! reload_completed || frame_pointer_needed)
9607 return 0;
9609 /* Don't allow more than 32k pop, since that's all we can do
9610 with one instruction. */
9611 if (crtl->args.pops_args && crtl->args.size >= 32768)
9612 return 0;
9614 ix86_compute_frame_layout (&frame);
9615 return (frame.stack_pointer_offset == UNITS_PER_WORD
9616 && (frame.nregs + frame.nsseregs) == 0);
9619 /* Value should be nonzero if functions must have frame pointers.
9620 Zero means the frame pointer need not be set up (and parms may
9621 be accessed via the stack pointer) in functions that seem suitable. */
9623 static bool
9624 ix86_frame_pointer_required (void)
9626 /* If we accessed previous frames, then the generated code expects
9627 to be able to access the saved ebp value in our frame. */
9628 if (cfun->machine->accesses_prev_frame)
9629 return true;
9631 /* Several x86 os'es need a frame pointer for other reasons,
9632 usually pertaining to setjmp. */
9633 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9634 return true;
9636 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9637 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9638 return true;
9640 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9641 allocation is 4GB. */
9642 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9643 return true;
9645 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9646 turns off the frame pointer by default. Turn it back on now if
9647 we've not got a leaf function. */
9648 if (TARGET_OMIT_LEAF_FRAME_POINTER
9649 && (!crtl->is_leaf
9650 || ix86_current_function_calls_tls_descriptor))
9651 return true;
9653 if (crtl->profile && !flag_fentry)
9654 return true;
9656 return false;
9659 /* Record that the current function accesses previous call frames. */
9661 void
9662 ix86_setup_frame_addresses (void)
9664 cfun->machine->accesses_prev_frame = 1;
9667 #ifndef USE_HIDDEN_LINKONCE
9668 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9669 # define USE_HIDDEN_LINKONCE 1
9670 # else
9671 # define USE_HIDDEN_LINKONCE 0
9672 # endif
9673 #endif
9675 static int pic_labels_used;
9677 /* Fills in the label name that should be used for a pc thunk for
9678 the given register. */
9680 static void
9681 get_pc_thunk_name (char name[32], unsigned int regno)
9683 gcc_assert (!TARGET_64BIT);
9685 if (USE_HIDDEN_LINKONCE)
9686 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9687 else
9688 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9692 /* This function generates code for -fpic that loads %ebx with
9693 the return address of the caller and then returns. */
9695 static void
9696 ix86_code_end (void)
9698 rtx xops[2];
9699 int regno;
9701 for (regno = AX_REG; regno <= SP_REG; regno++)
9703 char name[32];
9704 tree decl;
9706 if (!(pic_labels_used & (1 << regno)))
9707 continue;
9709 get_pc_thunk_name (name, regno);
9711 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9712 get_identifier (name),
9713 build_function_type_list (void_type_node, NULL_TREE));
9714 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9715 NULL_TREE, void_type_node);
9716 TREE_PUBLIC (decl) = 1;
9717 TREE_STATIC (decl) = 1;
9718 DECL_IGNORED_P (decl) = 1;
9720 #if TARGET_MACHO
9721 if (TARGET_MACHO)
9723 switch_to_section (darwin_sections[text_coal_section]);
9724 fputs ("\t.weak_definition\t", asm_out_file);
9725 assemble_name (asm_out_file, name);
9726 fputs ("\n\t.private_extern\t", asm_out_file);
9727 assemble_name (asm_out_file, name);
9728 putc ('\n', asm_out_file);
9729 ASM_OUTPUT_LABEL (asm_out_file, name);
9730 DECL_WEAK (decl) = 1;
9732 else
9733 #endif
9734 if (USE_HIDDEN_LINKONCE)
9736 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9738 targetm.asm_out.unique_section (decl, 0);
9739 switch_to_section (get_named_section (decl, NULL, 0));
9741 targetm.asm_out.globalize_label (asm_out_file, name);
9742 fputs ("\t.hidden\t", asm_out_file);
9743 assemble_name (asm_out_file, name);
9744 putc ('\n', asm_out_file);
9745 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9747 else
9749 switch_to_section (text_section);
9750 ASM_OUTPUT_LABEL (asm_out_file, name);
9753 DECL_INITIAL (decl) = make_node (BLOCK);
9754 current_function_decl = decl;
9755 init_function_start (decl);
9756 first_function_block_is_cold = false;
9757 /* Make sure unwind info is emitted for the thunk if needed. */
9758 final_start_function (emit_barrier (), asm_out_file, 1);
9760 /* Pad stack IP move with 4 instructions (two NOPs count
9761 as one instruction). */
9762 if (TARGET_PAD_SHORT_FUNCTION)
9764 int i = 8;
9766 while (i--)
9767 fputs ("\tnop\n", asm_out_file);
9770 xops[0] = gen_rtx_REG (Pmode, regno);
9771 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9772 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9773 output_asm_insn ("%!ret", NULL);
9774 final_end_function ();
9775 init_insn_lengths ();
9776 free_after_compilation (cfun);
9777 set_cfun (NULL);
9778 current_function_decl = NULL;
9781 if (flag_split_stack)
9782 file_end_indicate_split_stack ();
9785 /* Emit code for the SET_GOT patterns. */
9787 const char *
9788 output_set_got (rtx dest, rtx label)
9790 rtx xops[3];
9792 xops[0] = dest;
9794 if (TARGET_VXWORKS_RTP && flag_pic)
9796 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9797 xops[2] = gen_rtx_MEM (Pmode,
9798 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9799 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9801 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9802 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9803 an unadorned address. */
9804 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9805 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9806 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9807 return "";
9810 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9812 if (!flag_pic)
9814 if (TARGET_MACHO)
9815 /* We don't need a pic base, we're not producing pic. */
9816 gcc_unreachable ();
9818 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9819 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9820 targetm.asm_out.internal_label (asm_out_file, "L",
9821 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9823 else
9825 char name[32];
9826 get_pc_thunk_name (name, REGNO (dest));
9827 pic_labels_used |= 1 << REGNO (dest);
9829 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9830 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9831 output_asm_insn ("%!call\t%X2", xops);
9833 #if TARGET_MACHO
9834 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9835 This is what will be referenced by the Mach-O PIC subsystem. */
9836 if (machopic_should_output_picbase_label () || !label)
9837 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9839 /* When we are restoring the pic base at the site of a nonlocal label,
9840 and we decided to emit the pic base above, we will still output a
9841 local label used for calculating the correction offset (even though
9842 the offset will be 0 in that case). */
9843 if (label)
9844 targetm.asm_out.internal_label (asm_out_file, "L",
9845 CODE_LABEL_NUMBER (label));
9846 #endif
9849 if (!TARGET_MACHO)
9850 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9852 return "";
9855 /* Generate an "push" pattern for input ARG. */
9857 static rtx
9858 gen_push (rtx arg)
9860 struct machine_function *m = cfun->machine;
9862 if (m->fs.cfa_reg == stack_pointer_rtx)
9863 m->fs.cfa_offset += UNITS_PER_WORD;
9864 m->fs.sp_offset += UNITS_PER_WORD;
9866 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9867 arg = gen_rtx_REG (word_mode, REGNO (arg));
9869 return gen_rtx_SET (VOIDmode,
9870 gen_rtx_MEM (word_mode,
9871 gen_rtx_PRE_DEC (Pmode,
9872 stack_pointer_rtx)),
9873 arg);
9876 /* Generate an "pop" pattern for input ARG. */
9878 static rtx
9879 gen_pop (rtx arg)
9881 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9882 arg = gen_rtx_REG (word_mode, REGNO (arg));
9884 return gen_rtx_SET (VOIDmode,
9885 arg,
9886 gen_rtx_MEM (word_mode,
9887 gen_rtx_POST_INC (Pmode,
9888 stack_pointer_rtx)));
9891 /* Return >= 0 if there is an unused call-clobbered register available
9892 for the entire function. */
9894 static unsigned int
9895 ix86_select_alt_pic_regnum (void)
9897 if (ix86_use_pseudo_pic_reg ())
9898 return INVALID_REGNUM;
9900 if (crtl->is_leaf
9901 && !crtl->profile
9902 && !ix86_current_function_calls_tls_descriptor)
9904 int i, drap;
9905 /* Can't use the same register for both PIC and DRAP. */
9906 if (crtl->drap_reg)
9907 drap = REGNO (crtl->drap_reg);
9908 else
9909 drap = -1;
9910 for (i = 2; i >= 0; --i)
9911 if (i != drap && !df_regs_ever_live_p (i))
9912 return i;
9915 return INVALID_REGNUM;
9918 /* Return TRUE if we need to save REGNO. */
9920 static bool
9921 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9923 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
9924 && pic_offset_table_rtx)
9926 if (ix86_use_pseudo_pic_reg ())
9928 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
9929 _mcount in prologue. */
9930 if (!TARGET_64BIT && flag_pic && crtl->profile)
9931 return true;
9933 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9934 || crtl->profile
9935 || crtl->calls_eh_return
9936 || crtl->uses_const_pool
9937 || cfun->has_nonlocal_label)
9938 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9941 if (crtl->calls_eh_return && maybe_eh_return)
9943 unsigned i;
9944 for (i = 0; ; i++)
9946 unsigned test = EH_RETURN_DATA_REGNO (i);
9947 if (test == INVALID_REGNUM)
9948 break;
9949 if (test == regno)
9950 return true;
9954 if (crtl->drap_reg
9955 && regno == REGNO (crtl->drap_reg)
9956 && !cfun->machine->no_drap_save_restore)
9957 return true;
9959 return (df_regs_ever_live_p (regno)
9960 && !call_used_regs[regno]
9961 && !fixed_regs[regno]
9962 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9965 /* Return number of saved general prupose registers. */
9967 static int
9968 ix86_nsaved_regs (void)
9970 int nregs = 0;
9971 int regno;
9973 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9974 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9975 nregs ++;
9976 return nregs;
9979 /* Return number of saved SSE registrers. */
9981 static int
9982 ix86_nsaved_sseregs (void)
9984 int nregs = 0;
9985 int regno;
9987 if (!TARGET_64BIT_MS_ABI)
9988 return 0;
9989 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9990 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9991 nregs ++;
9992 return nregs;
9995 /* Given FROM and TO register numbers, say whether this elimination is
9996 allowed. If stack alignment is needed, we can only replace argument
9997 pointer with hard frame pointer, or replace frame pointer with stack
9998 pointer. Otherwise, frame pointer elimination is automatically
9999 handled and all other eliminations are valid. */
10001 static bool
10002 ix86_can_eliminate (const int from, const int to)
10004 if (stack_realign_fp)
10005 return ((from == ARG_POINTER_REGNUM
10006 && to == HARD_FRAME_POINTER_REGNUM)
10007 || (from == FRAME_POINTER_REGNUM
10008 && to == STACK_POINTER_REGNUM));
10009 else
10010 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
10013 /* Return the offset between two registers, one to be eliminated, and the other
10014 its replacement, at the start of a routine. */
10016 HOST_WIDE_INT
10017 ix86_initial_elimination_offset (int from, int to)
10019 struct ix86_frame frame;
10020 ix86_compute_frame_layout (&frame);
10022 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10023 return frame.hard_frame_pointer_offset;
10024 else if (from == FRAME_POINTER_REGNUM
10025 && to == HARD_FRAME_POINTER_REGNUM)
10026 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
10027 else
10029 gcc_assert (to == STACK_POINTER_REGNUM);
10031 if (from == ARG_POINTER_REGNUM)
10032 return frame.stack_pointer_offset;
10034 gcc_assert (from == FRAME_POINTER_REGNUM);
10035 return frame.stack_pointer_offset - frame.frame_pointer_offset;
10039 /* In a dynamically-aligned function, we can't know the offset from
10040 stack pointer to frame pointer, so we must ensure that setjmp
10041 eliminates fp against the hard fp (%ebp) rather than trying to
10042 index from %esp up to the top of the frame across a gap that is
10043 of unknown (at compile-time) size. */
10044 static rtx
10045 ix86_builtin_setjmp_frame_value (void)
10047 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
10050 /* When using -fsplit-stack, the allocation routines set a field in
10051 the TCB to the bottom of the stack plus this much space, measured
10052 in bytes. */
10054 #define SPLIT_STACK_AVAILABLE 256
10056 /* Fill structure ix86_frame about frame of currently computed function. */
10058 static void
10059 ix86_compute_frame_layout (struct ix86_frame *frame)
10061 unsigned HOST_WIDE_INT stack_alignment_needed;
10062 HOST_WIDE_INT offset;
10063 unsigned HOST_WIDE_INT preferred_alignment;
10064 HOST_WIDE_INT size = get_frame_size ();
10065 HOST_WIDE_INT to_allocate;
10067 frame->nregs = ix86_nsaved_regs ();
10068 frame->nsseregs = ix86_nsaved_sseregs ();
10070 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
10071 function prologues and leaf. */
10072 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
10073 && (!crtl->is_leaf || cfun->calls_alloca != 0
10074 || ix86_current_function_calls_tls_descriptor))
10076 crtl->preferred_stack_boundary = 128;
10077 crtl->stack_alignment_needed = 128;
10079 /* preferred_stack_boundary is never updated for call
10080 expanded from tls descriptor. Update it here. We don't update it in
10081 expand stage because according to the comments before
10082 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
10083 away. */
10084 else if (ix86_current_function_calls_tls_descriptor
10085 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
10087 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
10088 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
10089 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
10092 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
10093 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
10095 gcc_assert (!size || stack_alignment_needed);
10096 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
10097 gcc_assert (preferred_alignment <= stack_alignment_needed);
10099 /* For SEH we have to limit the amount of code movement into the prologue.
10100 At present we do this via a BLOCKAGE, at which point there's very little
10101 scheduling that can be done, which means that there's very little point
10102 in doing anything except PUSHs. */
10103 if (TARGET_SEH)
10104 cfun->machine->use_fast_prologue_epilogue = false;
10106 /* During reload iteration the amount of registers saved can change.
10107 Recompute the value as needed. Do not recompute when amount of registers
10108 didn't change as reload does multiple calls to the function and does not
10109 expect the decision to change within single iteration. */
10110 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
10111 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
10113 int count = frame->nregs;
10114 struct cgraph_node *node = cgraph_node::get (current_function_decl);
10116 cfun->machine->use_fast_prologue_epilogue_nregs = count;
10118 /* The fast prologue uses move instead of push to save registers. This
10119 is significantly longer, but also executes faster as modern hardware
10120 can execute the moves in parallel, but can't do that for push/pop.
10122 Be careful about choosing what prologue to emit: When function takes
10123 many instructions to execute we may use slow version as well as in
10124 case function is known to be outside hot spot (this is known with
10125 feedback only). Weight the size of function by number of registers
10126 to save as it is cheap to use one or two push instructions but very
10127 slow to use many of them. */
10128 if (count)
10129 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
10130 if (node->frequency < NODE_FREQUENCY_NORMAL
10131 || (flag_branch_probabilities
10132 && node->frequency < NODE_FREQUENCY_HOT))
10133 cfun->machine->use_fast_prologue_epilogue = false;
10134 else
10135 cfun->machine->use_fast_prologue_epilogue
10136 = !expensive_function_p (count);
10139 frame->save_regs_using_mov
10140 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
10141 /* If static stack checking is enabled and done with probes,
10142 the registers need to be saved before allocating the frame. */
10143 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
10145 /* Skip return address. */
10146 offset = UNITS_PER_WORD;
10148 /* Skip pushed static chain. */
10149 if (ix86_static_chain_on_stack)
10150 offset += UNITS_PER_WORD;
10152 /* Skip saved base pointer. */
10153 if (frame_pointer_needed)
10154 offset += UNITS_PER_WORD;
10155 frame->hfp_save_offset = offset;
10157 /* The traditional frame pointer location is at the top of the frame. */
10158 frame->hard_frame_pointer_offset = offset;
10160 /* Register save area */
10161 offset += frame->nregs * UNITS_PER_WORD;
10162 frame->reg_save_offset = offset;
10164 /* On SEH target, registers are pushed just before the frame pointer
10165 location. */
10166 if (TARGET_SEH)
10167 frame->hard_frame_pointer_offset = offset;
10169 /* Align and set SSE register save area. */
10170 if (frame->nsseregs)
10172 /* The only ABI that has saved SSE registers (Win64) also has a
10173 16-byte aligned default stack, and thus we don't need to be
10174 within the re-aligned local stack frame to save them. */
10175 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
10176 offset = (offset + 16 - 1) & -16;
10177 offset += frame->nsseregs * 16;
10179 frame->sse_reg_save_offset = offset;
10181 /* The re-aligned stack starts here. Values before this point are not
10182 directly comparable with values below this point. In order to make
10183 sure that no value happens to be the same before and after, force
10184 the alignment computation below to add a non-zero value. */
10185 if (stack_realign_fp)
10186 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
10188 /* Va-arg area */
10189 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10190 offset += frame->va_arg_size;
10192 /* Align start of frame for local function. */
10193 if (stack_realign_fp
10194 || offset != frame->sse_reg_save_offset
10195 || size != 0
10196 || !crtl->is_leaf
10197 || cfun->calls_alloca
10198 || ix86_current_function_calls_tls_descriptor)
10199 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10201 /* Frame pointer points here. */
10202 frame->frame_pointer_offset = offset;
10204 offset += size;
10206 /* Add outgoing arguments area. Can be skipped if we eliminated
10207 all the function calls as dead code.
10208 Skipping is however impossible when function calls alloca. Alloca
10209 expander assumes that last crtl->outgoing_args_size
10210 of stack frame are unused. */
10211 if (ACCUMULATE_OUTGOING_ARGS
10212 && (!crtl->is_leaf || cfun->calls_alloca
10213 || ix86_current_function_calls_tls_descriptor))
10215 offset += crtl->outgoing_args_size;
10216 frame->outgoing_arguments_size = crtl->outgoing_args_size;
10218 else
10219 frame->outgoing_arguments_size = 0;
10221 /* Align stack boundary. Only needed if we're calling another function
10222 or using alloca. */
10223 if (!crtl->is_leaf || cfun->calls_alloca
10224 || ix86_current_function_calls_tls_descriptor)
10225 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10227 /* We've reached end of stack frame. */
10228 frame->stack_pointer_offset = offset;
10230 /* Size prologue needs to allocate. */
10231 to_allocate = offset - frame->sse_reg_save_offset;
10233 if ((!to_allocate && frame->nregs <= 1)
10234 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10235 frame->save_regs_using_mov = false;
10237 if (ix86_using_red_zone ()
10238 && crtl->sp_is_unchanging
10239 && crtl->is_leaf
10240 && !ix86_current_function_calls_tls_descriptor)
10242 frame->red_zone_size = to_allocate;
10243 if (frame->save_regs_using_mov)
10244 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10245 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10246 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10248 else
10249 frame->red_zone_size = 0;
10250 frame->stack_pointer_offset -= frame->red_zone_size;
10252 /* The SEH frame pointer location is near the bottom of the frame.
10253 This is enforced by the fact that the difference between the
10254 stack pointer and the frame pointer is limited to 240 bytes in
10255 the unwind data structure. */
10256 if (TARGET_SEH)
10258 HOST_WIDE_INT diff;
10260 /* If we can leave the frame pointer where it is, do so. Also, returns
10261 the establisher frame for __builtin_frame_address (0). */
10262 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10263 if (diff <= SEH_MAX_FRAME_SIZE
10264 && (diff > 240 || (diff & 15) != 0)
10265 && !crtl->accesses_prior_frames)
10267 /* Ideally we'd determine what portion of the local stack frame
10268 (within the constraint of the lowest 240) is most heavily used.
10269 But without that complication, simply bias the frame pointer
10270 by 128 bytes so as to maximize the amount of the local stack
10271 frame that is addressable with 8-bit offsets. */
10272 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10277 /* This is semi-inlined memory_address_length, but simplified
10278 since we know that we're always dealing with reg+offset, and
10279 to avoid having to create and discard all that rtl. */
10281 static inline int
10282 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10284 int len = 4;
10286 if (offset == 0)
10288 /* EBP and R13 cannot be encoded without an offset. */
10289 len = (regno == BP_REG || regno == R13_REG);
10291 else if (IN_RANGE (offset, -128, 127))
10292 len = 1;
10294 /* ESP and R12 must be encoded with a SIB byte. */
10295 if (regno == SP_REG || regno == R12_REG)
10296 len++;
10298 return len;
10301 /* Return an RTX that points to CFA_OFFSET within the stack frame.
10302 The valid base registers are taken from CFUN->MACHINE->FS. */
10304 static rtx
10305 choose_baseaddr (HOST_WIDE_INT cfa_offset)
10307 const struct machine_function *m = cfun->machine;
10308 rtx base_reg = NULL;
10309 HOST_WIDE_INT base_offset = 0;
10311 if (m->use_fast_prologue_epilogue)
10313 /* Choose the base register most likely to allow the most scheduling
10314 opportunities. Generally FP is valid throughout the function,
10315 while DRAP must be reloaded within the epilogue. But choose either
10316 over the SP due to increased encoding size. */
10318 if (m->fs.fp_valid)
10320 base_reg = hard_frame_pointer_rtx;
10321 base_offset = m->fs.fp_offset - cfa_offset;
10323 else if (m->fs.drap_valid)
10325 base_reg = crtl->drap_reg;
10326 base_offset = 0 - cfa_offset;
10328 else if (m->fs.sp_valid)
10330 base_reg = stack_pointer_rtx;
10331 base_offset = m->fs.sp_offset - cfa_offset;
10334 else
10336 HOST_WIDE_INT toffset;
10337 int len = 16, tlen;
10339 /* Choose the base register with the smallest address encoding.
10340 With a tie, choose FP > DRAP > SP. */
10341 if (m->fs.sp_valid)
10343 base_reg = stack_pointer_rtx;
10344 base_offset = m->fs.sp_offset - cfa_offset;
10345 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10347 if (m->fs.drap_valid)
10349 toffset = 0 - cfa_offset;
10350 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10351 if (tlen <= len)
10353 base_reg = crtl->drap_reg;
10354 base_offset = toffset;
10355 len = tlen;
10358 if (m->fs.fp_valid)
10360 toffset = m->fs.fp_offset - cfa_offset;
10361 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10362 if (tlen <= len)
10364 base_reg = hard_frame_pointer_rtx;
10365 base_offset = toffset;
10366 len = tlen;
10370 gcc_assert (base_reg != NULL);
10372 return plus_constant (Pmode, base_reg, base_offset);
10375 /* Emit code to save registers in the prologue. */
10377 static void
10378 ix86_emit_save_regs (void)
10380 unsigned int regno;
10381 rtx_insn *insn;
10383 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10384 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10386 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10387 RTX_FRAME_RELATED_P (insn) = 1;
10391 /* Emit a single register save at CFA - CFA_OFFSET. */
10393 static void
10394 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10395 HOST_WIDE_INT cfa_offset)
10397 struct machine_function *m = cfun->machine;
10398 rtx reg = gen_rtx_REG (mode, regno);
10399 rtx mem, addr, base, insn;
10401 addr = choose_baseaddr (cfa_offset);
10402 mem = gen_frame_mem (mode, addr);
10404 /* For SSE saves, we need to indicate the 128-bit alignment. */
10405 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
10407 insn = emit_move_insn (mem, reg);
10408 RTX_FRAME_RELATED_P (insn) = 1;
10410 base = addr;
10411 if (GET_CODE (base) == PLUS)
10412 base = XEXP (base, 0);
10413 gcc_checking_assert (REG_P (base));
10415 /* When saving registers into a re-aligned local stack frame, avoid
10416 any tricky guessing by dwarf2out. */
10417 if (m->fs.realigned)
10419 gcc_checking_assert (stack_realign_drap);
10421 if (regno == REGNO (crtl->drap_reg))
10423 /* A bit of a hack. We force the DRAP register to be saved in
10424 the re-aligned stack frame, which provides us with a copy
10425 of the CFA that will last past the prologue. Install it. */
10426 gcc_checking_assert (cfun->machine->fs.fp_valid);
10427 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10428 cfun->machine->fs.fp_offset - cfa_offset);
10429 mem = gen_rtx_MEM (mode, addr);
10430 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10432 else
10434 /* The frame pointer is a stable reference within the
10435 aligned frame. Use it. */
10436 gcc_checking_assert (cfun->machine->fs.fp_valid);
10437 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10438 cfun->machine->fs.fp_offset - cfa_offset);
10439 mem = gen_rtx_MEM (mode, addr);
10440 add_reg_note (insn, REG_CFA_EXPRESSION,
10441 gen_rtx_SET (VOIDmode, mem, reg));
10445 /* The memory may not be relative to the current CFA register,
10446 which means that we may need to generate a new pattern for
10447 use by the unwind info. */
10448 else if (base != m->fs.cfa_reg)
10450 addr = plus_constant (Pmode, m->fs.cfa_reg,
10451 m->fs.cfa_offset - cfa_offset);
10452 mem = gen_rtx_MEM (mode, addr);
10453 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
10457 /* Emit code to save registers using MOV insns.
10458 First register is stored at CFA - CFA_OFFSET. */
10459 static void
10460 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10462 unsigned int regno;
10464 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10465 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10467 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10468 cfa_offset -= UNITS_PER_WORD;
10472 /* Emit code to save SSE registers using MOV insns.
10473 First register is stored at CFA - CFA_OFFSET. */
10474 static void
10475 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10477 unsigned int regno;
10479 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10480 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10482 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10483 cfa_offset -= 16;
10487 static GTY(()) rtx queued_cfa_restores;
10489 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10490 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10491 Don't add the note if the previously saved value will be left untouched
10492 within stack red-zone till return, as unwinders can find the same value
10493 in the register and on the stack. */
10495 static void
10496 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
10498 if (!crtl->shrink_wrapped
10499 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10500 return;
10502 if (insn)
10504 add_reg_note (insn, REG_CFA_RESTORE, reg);
10505 RTX_FRAME_RELATED_P (insn) = 1;
10507 else
10508 queued_cfa_restores
10509 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10512 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10514 static void
10515 ix86_add_queued_cfa_restore_notes (rtx insn)
10517 rtx last;
10518 if (!queued_cfa_restores)
10519 return;
10520 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10522 XEXP (last, 1) = REG_NOTES (insn);
10523 REG_NOTES (insn) = queued_cfa_restores;
10524 queued_cfa_restores = NULL_RTX;
10525 RTX_FRAME_RELATED_P (insn) = 1;
10528 /* Expand prologue or epilogue stack adjustment.
10529 The pattern exist to put a dependency on all ebp-based memory accesses.
10530 STYLE should be negative if instructions should be marked as frame related,
10531 zero if %r11 register is live and cannot be freely used and positive
10532 otherwise. */
10534 static void
10535 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10536 int style, bool set_cfa)
10538 struct machine_function *m = cfun->machine;
10539 rtx insn;
10540 bool add_frame_related_expr = false;
10542 if (Pmode == SImode)
10543 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10544 else if (x86_64_immediate_operand (offset, DImode))
10545 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10546 else
10548 rtx tmp;
10549 /* r11 is used by indirect sibcall return as well, set before the
10550 epilogue and used after the epilogue. */
10551 if (style)
10552 tmp = gen_rtx_REG (DImode, R11_REG);
10553 else
10555 gcc_assert (src != hard_frame_pointer_rtx
10556 && dest != hard_frame_pointer_rtx);
10557 tmp = hard_frame_pointer_rtx;
10559 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10560 if (style < 0)
10561 add_frame_related_expr = true;
10563 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10566 insn = emit_insn (insn);
10567 if (style >= 0)
10568 ix86_add_queued_cfa_restore_notes (insn);
10570 if (set_cfa)
10572 rtx r;
10574 gcc_assert (m->fs.cfa_reg == src);
10575 m->fs.cfa_offset += INTVAL (offset);
10576 m->fs.cfa_reg = dest;
10578 r = gen_rtx_PLUS (Pmode, src, offset);
10579 r = gen_rtx_SET (VOIDmode, dest, r);
10580 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10581 RTX_FRAME_RELATED_P (insn) = 1;
10583 else if (style < 0)
10585 RTX_FRAME_RELATED_P (insn) = 1;
10586 if (add_frame_related_expr)
10588 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10589 r = gen_rtx_SET (VOIDmode, dest, r);
10590 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10594 if (dest == stack_pointer_rtx)
10596 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10597 bool valid = m->fs.sp_valid;
10599 if (src == hard_frame_pointer_rtx)
10601 valid = m->fs.fp_valid;
10602 ooffset = m->fs.fp_offset;
10604 else if (src == crtl->drap_reg)
10606 valid = m->fs.drap_valid;
10607 ooffset = 0;
10609 else
10611 /* Else there are two possibilities: SP itself, which we set
10612 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10613 taken care of this by hand along the eh_return path. */
10614 gcc_checking_assert (src == stack_pointer_rtx
10615 || offset == const0_rtx);
10618 m->fs.sp_offset = ooffset - INTVAL (offset);
10619 m->fs.sp_valid = valid;
10623 /* Find an available register to be used as dynamic realign argument
10624 pointer regsiter. Such a register will be written in prologue and
10625 used in begin of body, so it must not be
10626 1. parameter passing register.
10627 2. GOT pointer.
10628 We reuse static-chain register if it is available. Otherwise, we
10629 use DI for i386 and R13 for x86-64. We chose R13 since it has
10630 shorter encoding.
10632 Return: the regno of chosen register. */
10634 static unsigned int
10635 find_drap_reg (void)
10637 tree decl = cfun->decl;
10639 if (TARGET_64BIT)
10641 /* Use R13 for nested function or function need static chain.
10642 Since function with tail call may use any caller-saved
10643 registers in epilogue, DRAP must not use caller-saved
10644 register in such case. */
10645 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10646 return R13_REG;
10648 return R10_REG;
10650 else
10652 /* Use DI for nested function or function need static chain.
10653 Since function with tail call may use any caller-saved
10654 registers in epilogue, DRAP must not use caller-saved
10655 register in such case. */
10656 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10657 return DI_REG;
10659 /* Reuse static chain register if it isn't used for parameter
10660 passing. */
10661 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10663 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10664 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10665 return CX_REG;
10667 return DI_REG;
10671 /* Return minimum incoming stack alignment. */
10673 static unsigned int
10674 ix86_minimum_incoming_stack_boundary (bool sibcall)
10676 unsigned int incoming_stack_boundary;
10678 /* Prefer the one specified at command line. */
10679 if (ix86_user_incoming_stack_boundary)
10680 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10681 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10682 if -mstackrealign is used, it isn't used for sibcall check and
10683 estimated stack alignment is 128bit. */
10684 else if (!sibcall
10685 && !TARGET_64BIT
10686 && ix86_force_align_arg_pointer
10687 && crtl->stack_alignment_estimated == 128)
10688 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10689 else
10690 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10692 /* Incoming stack alignment can be changed on individual functions
10693 via force_align_arg_pointer attribute. We use the smallest
10694 incoming stack boundary. */
10695 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10696 && lookup_attribute (ix86_force_align_arg_pointer_string,
10697 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10698 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10700 /* The incoming stack frame has to be aligned at least at
10701 parm_stack_boundary. */
10702 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10703 incoming_stack_boundary = crtl->parm_stack_boundary;
10705 /* Stack at entrance of main is aligned by runtime. We use the
10706 smallest incoming stack boundary. */
10707 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10708 && DECL_NAME (current_function_decl)
10709 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10710 && DECL_FILE_SCOPE_P (current_function_decl))
10711 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10713 return incoming_stack_boundary;
10716 /* Update incoming stack boundary and estimated stack alignment. */
10718 static void
10719 ix86_update_stack_boundary (void)
10721 ix86_incoming_stack_boundary
10722 = ix86_minimum_incoming_stack_boundary (false);
10724 /* x86_64 vararg needs 16byte stack alignment for register save
10725 area. */
10726 if (TARGET_64BIT
10727 && cfun->stdarg
10728 && crtl->stack_alignment_estimated < 128)
10729 crtl->stack_alignment_estimated = 128;
10732 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10733 needed or an rtx for DRAP otherwise. */
10735 static rtx
10736 ix86_get_drap_rtx (void)
10738 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10739 crtl->need_drap = true;
10741 if (stack_realign_drap)
10743 /* Assign DRAP to vDRAP and returns vDRAP */
10744 unsigned int regno = find_drap_reg ();
10745 rtx drap_vreg;
10746 rtx arg_ptr;
10747 rtx_insn *seq, *insn;
10749 arg_ptr = gen_rtx_REG (Pmode, regno);
10750 crtl->drap_reg = arg_ptr;
10752 start_sequence ();
10753 drap_vreg = copy_to_reg (arg_ptr);
10754 seq = get_insns ();
10755 end_sequence ();
10757 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10758 if (!optimize)
10760 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10761 RTX_FRAME_RELATED_P (insn) = 1;
10763 return drap_vreg;
10765 else
10766 return NULL;
10769 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10771 static rtx
10772 ix86_internal_arg_pointer (void)
10774 return virtual_incoming_args_rtx;
10777 struct scratch_reg {
10778 rtx reg;
10779 bool saved;
10782 /* Return a short-lived scratch register for use on function entry.
10783 In 32-bit mode, it is valid only after the registers are saved
10784 in the prologue. This register must be released by means of
10785 release_scratch_register_on_entry once it is dead. */
10787 static void
10788 get_scratch_register_on_entry (struct scratch_reg *sr)
10790 int regno;
10792 sr->saved = false;
10794 if (TARGET_64BIT)
10796 /* We always use R11 in 64-bit mode. */
10797 regno = R11_REG;
10799 else
10801 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10802 bool fastcall_p
10803 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10804 bool thiscall_p
10805 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10806 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10807 int regparm = ix86_function_regparm (fntype, decl);
10808 int drap_regno
10809 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10811 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10812 for the static chain register. */
10813 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10814 && drap_regno != AX_REG)
10815 regno = AX_REG;
10816 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10817 for the static chain register. */
10818 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10819 regno = AX_REG;
10820 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10821 regno = DX_REG;
10822 /* ecx is the static chain register. */
10823 else if (regparm < 3 && !fastcall_p && !thiscall_p
10824 && !static_chain_p
10825 && drap_regno != CX_REG)
10826 regno = CX_REG;
10827 else if (ix86_save_reg (BX_REG, true))
10828 regno = BX_REG;
10829 /* esi is the static chain register. */
10830 else if (!(regparm == 3 && static_chain_p)
10831 && ix86_save_reg (SI_REG, true))
10832 regno = SI_REG;
10833 else if (ix86_save_reg (DI_REG, true))
10834 regno = DI_REG;
10835 else
10837 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10838 sr->saved = true;
10842 sr->reg = gen_rtx_REG (Pmode, regno);
10843 if (sr->saved)
10845 rtx_insn *insn = emit_insn (gen_push (sr->reg));
10846 RTX_FRAME_RELATED_P (insn) = 1;
10850 /* Release a scratch register obtained from the preceding function. */
10852 static void
10853 release_scratch_register_on_entry (struct scratch_reg *sr)
10855 if (sr->saved)
10857 struct machine_function *m = cfun->machine;
10858 rtx x, insn = emit_insn (gen_pop (sr->reg));
10860 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10861 RTX_FRAME_RELATED_P (insn) = 1;
10862 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10863 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10864 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10865 m->fs.sp_offset -= UNITS_PER_WORD;
10869 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10871 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10873 static void
10874 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10876 /* We skip the probe for the first interval + a small dope of 4 words and
10877 probe that many bytes past the specified size to maintain a protection
10878 area at the botton of the stack. */
10879 const int dope = 4 * UNITS_PER_WORD;
10880 rtx size_rtx = GEN_INT (size), last;
10882 /* See if we have a constant small number of probes to generate. If so,
10883 that's the easy case. The run-time loop is made up of 11 insns in the
10884 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10885 for n # of intervals. */
10886 if (size <= 5 * PROBE_INTERVAL)
10888 HOST_WIDE_INT i, adjust;
10889 bool first_probe = true;
10891 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10892 values of N from 1 until it exceeds SIZE. If only one probe is
10893 needed, this will not generate any code. Then adjust and probe
10894 to PROBE_INTERVAL + SIZE. */
10895 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10897 if (first_probe)
10899 adjust = 2 * PROBE_INTERVAL + dope;
10900 first_probe = false;
10902 else
10903 adjust = PROBE_INTERVAL;
10905 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10906 plus_constant (Pmode, stack_pointer_rtx,
10907 -adjust)));
10908 emit_stack_probe (stack_pointer_rtx);
10911 if (first_probe)
10912 adjust = size + PROBE_INTERVAL + dope;
10913 else
10914 adjust = size + PROBE_INTERVAL - i;
10916 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10917 plus_constant (Pmode, stack_pointer_rtx,
10918 -adjust)));
10919 emit_stack_probe (stack_pointer_rtx);
10921 /* Adjust back to account for the additional first interval. */
10922 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10923 plus_constant (Pmode, stack_pointer_rtx,
10924 PROBE_INTERVAL + dope)));
10927 /* Otherwise, do the same as above, but in a loop. Note that we must be
10928 extra careful with variables wrapping around because we might be at
10929 the very top (or the very bottom) of the address space and we have
10930 to be able to handle this case properly; in particular, we use an
10931 equality test for the loop condition. */
10932 else
10934 HOST_WIDE_INT rounded_size;
10935 struct scratch_reg sr;
10937 get_scratch_register_on_entry (&sr);
10940 /* Step 1: round SIZE to the previous multiple of the interval. */
10942 rounded_size = size & -PROBE_INTERVAL;
10945 /* Step 2: compute initial and final value of the loop counter. */
10947 /* SP = SP_0 + PROBE_INTERVAL. */
10948 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10949 plus_constant (Pmode, stack_pointer_rtx,
10950 - (PROBE_INTERVAL + dope))));
10952 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10953 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10954 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10955 gen_rtx_PLUS (Pmode, sr.reg,
10956 stack_pointer_rtx)));
10959 /* Step 3: the loop
10961 while (SP != LAST_ADDR)
10963 SP = SP + PROBE_INTERVAL
10964 probe at SP
10967 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10968 values of N from 1 until it is equal to ROUNDED_SIZE. */
10970 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10973 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10974 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10976 if (size != rounded_size)
10978 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10979 plus_constant (Pmode, stack_pointer_rtx,
10980 rounded_size - size)));
10981 emit_stack_probe (stack_pointer_rtx);
10984 /* Adjust back to account for the additional first interval. */
10985 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10986 plus_constant (Pmode, stack_pointer_rtx,
10987 PROBE_INTERVAL + dope)));
10989 release_scratch_register_on_entry (&sr);
10992 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10994 /* Even if the stack pointer isn't the CFA register, we need to correctly
10995 describe the adjustments made to it, in particular differentiate the
10996 frame-related ones from the frame-unrelated ones. */
10997 if (size > 0)
10999 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
11000 XVECEXP (expr, 0, 0)
11001 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11002 plus_constant (Pmode, stack_pointer_rtx, -size));
11003 XVECEXP (expr, 0, 1)
11004 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11005 plus_constant (Pmode, stack_pointer_rtx,
11006 PROBE_INTERVAL + dope + size));
11007 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
11008 RTX_FRAME_RELATED_P (last) = 1;
11010 cfun->machine->fs.sp_offset += size;
11013 /* Make sure nothing is scheduled before we are done. */
11014 emit_insn (gen_blockage ());
11017 /* Adjust the stack pointer up to REG while probing it. */
11019 const char *
11020 output_adjust_stack_and_probe (rtx reg)
11022 static int labelno = 0;
11023 char loop_lab[32], end_lab[32];
11024 rtx xops[2];
11026 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11027 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11029 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11031 /* Jump to END_LAB if SP == LAST_ADDR. */
11032 xops[0] = stack_pointer_rtx;
11033 xops[1] = reg;
11034 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11035 fputs ("\tje\t", asm_out_file);
11036 assemble_name_raw (asm_out_file, end_lab);
11037 fputc ('\n', asm_out_file);
11039 /* SP = SP + PROBE_INTERVAL. */
11040 xops[1] = GEN_INT (PROBE_INTERVAL);
11041 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11043 /* Probe at SP. */
11044 xops[1] = const0_rtx;
11045 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
11047 fprintf (asm_out_file, "\tjmp\t");
11048 assemble_name_raw (asm_out_file, loop_lab);
11049 fputc ('\n', asm_out_file);
11051 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11053 return "";
11056 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
11057 inclusive. These are offsets from the current stack pointer. */
11059 static void
11060 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
11062 /* See if we have a constant small number of probes to generate. If so,
11063 that's the easy case. The run-time loop is made up of 7 insns in the
11064 generic case while the compile-time loop is made up of n insns for n #
11065 of intervals. */
11066 if (size <= 7 * PROBE_INTERVAL)
11068 HOST_WIDE_INT i;
11070 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
11071 it exceeds SIZE. If only one probe is needed, this will not
11072 generate any code. Then probe at FIRST + SIZE. */
11073 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
11074 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11075 -(first + i)));
11077 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11078 -(first + size)));
11081 /* Otherwise, do the same as above, but in a loop. Note that we must be
11082 extra careful with variables wrapping around because we might be at
11083 the very top (or the very bottom) of the address space and we have
11084 to be able to handle this case properly; in particular, we use an
11085 equality test for the loop condition. */
11086 else
11088 HOST_WIDE_INT rounded_size, last;
11089 struct scratch_reg sr;
11091 get_scratch_register_on_entry (&sr);
11094 /* Step 1: round SIZE to the previous multiple of the interval. */
11096 rounded_size = size & -PROBE_INTERVAL;
11099 /* Step 2: compute initial and final value of the loop counter. */
11101 /* TEST_OFFSET = FIRST. */
11102 emit_move_insn (sr.reg, GEN_INT (-first));
11104 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
11105 last = first + rounded_size;
11108 /* Step 3: the loop
11110 while (TEST_ADDR != LAST_ADDR)
11112 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
11113 probe at TEST_ADDR
11116 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
11117 until it is equal to ROUNDED_SIZE. */
11119 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
11122 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
11123 that SIZE is equal to ROUNDED_SIZE. */
11125 if (size != rounded_size)
11126 emit_stack_probe (plus_constant (Pmode,
11127 gen_rtx_PLUS (Pmode,
11128 stack_pointer_rtx,
11129 sr.reg),
11130 rounded_size - size));
11132 release_scratch_register_on_entry (&sr);
11135 /* Make sure nothing is scheduled before we are done. */
11136 emit_insn (gen_blockage ());
11139 /* Probe a range of stack addresses from REG to END, inclusive. These are
11140 offsets from the current stack pointer. */
11142 const char *
11143 output_probe_stack_range (rtx reg, rtx end)
11145 static int labelno = 0;
11146 char loop_lab[32], end_lab[32];
11147 rtx xops[3];
11149 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11150 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11152 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11154 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
11155 xops[0] = reg;
11156 xops[1] = end;
11157 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11158 fputs ("\tje\t", asm_out_file);
11159 assemble_name_raw (asm_out_file, end_lab);
11160 fputc ('\n', asm_out_file);
11162 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
11163 xops[1] = GEN_INT (PROBE_INTERVAL);
11164 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11166 /* Probe at TEST_ADDR. */
11167 xops[0] = stack_pointer_rtx;
11168 xops[1] = reg;
11169 xops[2] = const0_rtx;
11170 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
11172 fprintf (asm_out_file, "\tjmp\t");
11173 assemble_name_raw (asm_out_file, loop_lab);
11174 fputc ('\n', asm_out_file);
11176 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11178 return "";
11181 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
11182 to be generated in correct form. */
11183 static void
11184 ix86_finalize_stack_realign_flags (void)
11186 /* Check if stack realign is really needed after reload, and
11187 stores result in cfun */
11188 unsigned int incoming_stack_boundary
11189 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11190 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11191 unsigned int stack_realign = (incoming_stack_boundary
11192 < (crtl->is_leaf
11193 ? crtl->max_used_stack_slot_alignment
11194 : crtl->stack_alignment_needed));
11196 if (crtl->stack_realign_finalized)
11198 /* After stack_realign_needed is finalized, we can't no longer
11199 change it. */
11200 gcc_assert (crtl->stack_realign_needed == stack_realign);
11201 return;
11204 /* If the only reason for frame_pointer_needed is that we conservatively
11205 assumed stack realignment might be needed, but in the end nothing that
11206 needed the stack alignment had been spilled, clear frame_pointer_needed
11207 and say we don't need stack realignment. */
11208 if (stack_realign
11209 && frame_pointer_needed
11210 && crtl->is_leaf
11211 && flag_omit_frame_pointer
11212 && crtl->sp_is_unchanging
11213 && !ix86_current_function_calls_tls_descriptor
11214 && !crtl->accesses_prior_frames
11215 && !cfun->calls_alloca
11216 && !crtl->calls_eh_return
11217 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
11218 && !ix86_frame_pointer_required ()
11219 && get_frame_size () == 0
11220 && ix86_nsaved_sseregs () == 0
11221 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11223 HARD_REG_SET set_up_by_prologue, prologue_used;
11224 basic_block bb;
11226 CLEAR_HARD_REG_SET (prologue_used);
11227 CLEAR_HARD_REG_SET (set_up_by_prologue);
11228 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11229 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11230 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11231 HARD_FRAME_POINTER_REGNUM);
11232 FOR_EACH_BB_FN (bb, cfun)
11234 rtx_insn *insn;
11235 FOR_BB_INSNS (bb, insn)
11236 if (NONDEBUG_INSN_P (insn)
11237 && requires_stack_frame_p (insn, prologue_used,
11238 set_up_by_prologue))
11240 crtl->stack_realign_needed = stack_realign;
11241 crtl->stack_realign_finalized = true;
11242 return;
11246 /* If drap has been set, but it actually isn't live at the start
11247 of the function, there is no reason to set it up. */
11248 if (crtl->drap_reg)
11250 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11251 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11253 crtl->drap_reg = NULL_RTX;
11254 crtl->need_drap = false;
11257 else
11258 cfun->machine->no_drap_save_restore = true;
11260 frame_pointer_needed = false;
11261 stack_realign = false;
11262 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11263 crtl->stack_alignment_needed = incoming_stack_boundary;
11264 crtl->stack_alignment_estimated = incoming_stack_boundary;
11265 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11266 crtl->preferred_stack_boundary = incoming_stack_boundary;
11267 df_finish_pass (true);
11268 df_scan_alloc (NULL);
11269 df_scan_blocks ();
11270 df_compute_regs_ever_live (true);
11271 df_analyze ();
11274 crtl->stack_realign_needed = stack_realign;
11275 crtl->stack_realign_finalized = true;
11278 /* Delete SET_GOT right after entry block if it is allocated to reg. */
11280 static void
11281 ix86_elim_entry_set_got (rtx reg)
11283 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11284 rtx_insn *c_insn = BB_HEAD (bb);
11285 if (!NONDEBUG_INSN_P (c_insn))
11286 c_insn = next_nonnote_nondebug_insn (c_insn);
11287 if (c_insn && NONJUMP_INSN_P (c_insn))
11289 rtx pat = PATTERN (c_insn);
11290 if (GET_CODE (pat) == PARALLEL)
11292 rtx vec = XVECEXP (pat, 0, 0);
11293 if (GET_CODE (vec) == SET
11294 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11295 && REGNO (XEXP (vec, 0)) == REGNO (reg))
11296 delete_insn (c_insn);
11301 /* Expand the prologue into a bunch of separate insns. */
11303 void
11304 ix86_expand_prologue (void)
11306 struct machine_function *m = cfun->machine;
11307 rtx insn, t;
11308 struct ix86_frame frame;
11309 HOST_WIDE_INT allocate;
11310 bool int_registers_saved;
11311 bool sse_registers_saved;
11313 ix86_finalize_stack_realign_flags ();
11315 /* DRAP should not coexist with stack_realign_fp */
11316 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11318 memset (&m->fs, 0, sizeof (m->fs));
11320 /* Initialize CFA state for before the prologue. */
11321 m->fs.cfa_reg = stack_pointer_rtx;
11322 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11324 /* Track SP offset to the CFA. We continue tracking this after we've
11325 swapped the CFA register away from SP. In the case of re-alignment
11326 this is fudged; we're interested to offsets within the local frame. */
11327 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11328 m->fs.sp_valid = true;
11330 ix86_compute_frame_layout (&frame);
11332 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11334 /* We should have already generated an error for any use of
11335 ms_hook on a nested function. */
11336 gcc_checking_assert (!ix86_static_chain_on_stack);
11338 /* Check if profiling is active and we shall use profiling before
11339 prologue variant. If so sorry. */
11340 if (crtl->profile && flag_fentry != 0)
11341 sorry ("ms_hook_prologue attribute isn%'t compatible "
11342 "with -mfentry for 32-bit");
11344 /* In ix86_asm_output_function_label we emitted:
11345 8b ff movl.s %edi,%edi
11346 55 push %ebp
11347 8b ec movl.s %esp,%ebp
11349 This matches the hookable function prologue in Win32 API
11350 functions in Microsoft Windows XP Service Pack 2 and newer.
11351 Wine uses this to enable Windows apps to hook the Win32 API
11352 functions provided by Wine.
11354 What that means is that we've already set up the frame pointer. */
11356 if (frame_pointer_needed
11357 && !(crtl->drap_reg && crtl->stack_realign_needed))
11359 rtx push, mov;
11361 /* We've decided to use the frame pointer already set up.
11362 Describe this to the unwinder by pretending that both
11363 push and mov insns happen right here.
11365 Putting the unwind info here at the end of the ms_hook
11366 is done so that we can make absolutely certain we get
11367 the required byte sequence at the start of the function,
11368 rather than relying on an assembler that can produce
11369 the exact encoding required.
11371 However it does mean (in the unpatched case) that we have
11372 a 1 insn window where the asynchronous unwind info is
11373 incorrect. However, if we placed the unwind info at
11374 its correct location we would have incorrect unwind info
11375 in the patched case. Which is probably all moot since
11376 I don't expect Wine generates dwarf2 unwind info for the
11377 system libraries that use this feature. */
11379 insn = emit_insn (gen_blockage ());
11381 push = gen_push (hard_frame_pointer_rtx);
11382 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
11383 stack_pointer_rtx);
11384 RTX_FRAME_RELATED_P (push) = 1;
11385 RTX_FRAME_RELATED_P (mov) = 1;
11387 RTX_FRAME_RELATED_P (insn) = 1;
11388 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11389 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11391 /* Note that gen_push incremented m->fs.cfa_offset, even
11392 though we didn't emit the push insn here. */
11393 m->fs.cfa_reg = hard_frame_pointer_rtx;
11394 m->fs.fp_offset = m->fs.cfa_offset;
11395 m->fs.fp_valid = true;
11397 else
11399 /* The frame pointer is not needed so pop %ebp again.
11400 This leaves us with a pristine state. */
11401 emit_insn (gen_pop (hard_frame_pointer_rtx));
11405 /* The first insn of a function that accepts its static chain on the
11406 stack is to push the register that would be filled in by a direct
11407 call. This insn will be skipped by the trampoline. */
11408 else if (ix86_static_chain_on_stack)
11410 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
11411 emit_insn (gen_blockage ());
11413 /* We don't want to interpret this push insn as a register save,
11414 only as a stack adjustment. The real copy of the register as
11415 a save will be done later, if needed. */
11416 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11417 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
11418 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11419 RTX_FRAME_RELATED_P (insn) = 1;
11422 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11423 of DRAP is needed and stack realignment is really needed after reload */
11424 if (stack_realign_drap)
11426 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11428 /* Only need to push parameter pointer reg if it is caller saved. */
11429 if (!call_used_regs[REGNO (crtl->drap_reg)])
11431 /* Push arg pointer reg */
11432 insn = emit_insn (gen_push (crtl->drap_reg));
11433 RTX_FRAME_RELATED_P (insn) = 1;
11436 /* Grab the argument pointer. */
11437 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11438 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11439 RTX_FRAME_RELATED_P (insn) = 1;
11440 m->fs.cfa_reg = crtl->drap_reg;
11441 m->fs.cfa_offset = 0;
11443 /* Align the stack. */
11444 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11445 stack_pointer_rtx,
11446 GEN_INT (-align_bytes)));
11447 RTX_FRAME_RELATED_P (insn) = 1;
11449 /* Replicate the return address on the stack so that return
11450 address can be reached via (argp - 1) slot. This is needed
11451 to implement macro RETURN_ADDR_RTX and intrinsic function
11452 expand_builtin_return_addr etc. */
11453 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11454 t = gen_frame_mem (word_mode, t);
11455 insn = emit_insn (gen_push (t));
11456 RTX_FRAME_RELATED_P (insn) = 1;
11458 /* For the purposes of frame and register save area addressing,
11459 we've started over with a new frame. */
11460 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11461 m->fs.realigned = true;
11464 int_registers_saved = (frame.nregs == 0);
11465 sse_registers_saved = (frame.nsseregs == 0);
11467 if (frame_pointer_needed && !m->fs.fp_valid)
11469 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11470 slower on all targets. Also sdb doesn't like it. */
11471 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11472 RTX_FRAME_RELATED_P (insn) = 1;
11474 /* Push registers now, before setting the frame pointer
11475 on SEH target. */
11476 if (!int_registers_saved
11477 && TARGET_SEH
11478 && !frame.save_regs_using_mov)
11480 ix86_emit_save_regs ();
11481 int_registers_saved = true;
11482 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11485 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11487 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11488 RTX_FRAME_RELATED_P (insn) = 1;
11490 if (m->fs.cfa_reg == stack_pointer_rtx)
11491 m->fs.cfa_reg = hard_frame_pointer_rtx;
11492 m->fs.fp_offset = m->fs.sp_offset;
11493 m->fs.fp_valid = true;
11497 if (!int_registers_saved)
11499 /* If saving registers via PUSH, do so now. */
11500 if (!frame.save_regs_using_mov)
11502 ix86_emit_save_regs ();
11503 int_registers_saved = true;
11504 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11507 /* When using red zone we may start register saving before allocating
11508 the stack frame saving one cycle of the prologue. However, avoid
11509 doing this if we have to probe the stack; at least on x86_64 the
11510 stack probe can turn into a call that clobbers a red zone location. */
11511 else if (ix86_using_red_zone ()
11512 && (! TARGET_STACK_PROBE
11513 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11515 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11516 int_registers_saved = true;
11520 if (stack_realign_fp)
11522 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11523 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11525 /* The computation of the size of the re-aligned stack frame means
11526 that we must allocate the size of the register save area before
11527 performing the actual alignment. Otherwise we cannot guarantee
11528 that there's enough storage above the realignment point. */
11529 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11530 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11531 GEN_INT (m->fs.sp_offset
11532 - frame.sse_reg_save_offset),
11533 -1, false);
11535 /* Align the stack. */
11536 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11537 stack_pointer_rtx,
11538 GEN_INT (-align_bytes)));
11540 /* For the purposes of register save area addressing, the stack
11541 pointer is no longer valid. As for the value of sp_offset,
11542 see ix86_compute_frame_layout, which we need to match in order
11543 to pass verification of stack_pointer_offset at the end. */
11544 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11545 m->fs.sp_valid = false;
11548 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11550 if (flag_stack_usage_info)
11552 /* We start to count from ARG_POINTER. */
11553 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11555 /* If it was realigned, take into account the fake frame. */
11556 if (stack_realign_drap)
11558 if (ix86_static_chain_on_stack)
11559 stack_size += UNITS_PER_WORD;
11561 if (!call_used_regs[REGNO (crtl->drap_reg)])
11562 stack_size += UNITS_PER_WORD;
11564 /* This over-estimates by 1 minimal-stack-alignment-unit but
11565 mitigates that by counting in the new return address slot. */
11566 current_function_dynamic_stack_size
11567 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11570 current_function_static_stack_size = stack_size;
11573 /* On SEH target with very large frame size, allocate an area to save
11574 SSE registers (as the very large allocation won't be described). */
11575 if (TARGET_SEH
11576 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11577 && !sse_registers_saved)
11579 HOST_WIDE_INT sse_size =
11580 frame.sse_reg_save_offset - frame.reg_save_offset;
11582 gcc_assert (int_registers_saved);
11584 /* No need to do stack checking as the area will be immediately
11585 written. */
11586 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11587 GEN_INT (-sse_size), -1,
11588 m->fs.cfa_reg == stack_pointer_rtx);
11589 allocate -= sse_size;
11590 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11591 sse_registers_saved = true;
11594 /* The stack has already been decremented by the instruction calling us
11595 so probe if the size is non-negative to preserve the protection area. */
11596 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11598 /* We expect the registers to be saved when probes are used. */
11599 gcc_assert (int_registers_saved);
11601 if (STACK_CHECK_MOVING_SP)
11603 if (!(crtl->is_leaf && !cfun->calls_alloca
11604 && allocate <= PROBE_INTERVAL))
11606 ix86_adjust_stack_and_probe (allocate);
11607 allocate = 0;
11610 else
11612 HOST_WIDE_INT size = allocate;
11614 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11615 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11617 if (TARGET_STACK_PROBE)
11619 if (crtl->is_leaf && !cfun->calls_alloca)
11621 if (size > PROBE_INTERVAL)
11622 ix86_emit_probe_stack_range (0, size);
11624 else
11625 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11627 else
11629 if (crtl->is_leaf && !cfun->calls_alloca)
11631 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11632 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11633 size - STACK_CHECK_PROTECT);
11635 else
11636 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11641 if (allocate == 0)
11643 else if (!ix86_target_stack_probe ()
11644 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11646 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11647 GEN_INT (-allocate), -1,
11648 m->fs.cfa_reg == stack_pointer_rtx);
11650 else
11652 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11653 rtx r10 = NULL;
11654 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11655 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11656 bool eax_live = ix86_eax_live_at_start_p ();
11657 bool r10_live = false;
11659 if (TARGET_64BIT)
11660 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11662 if (eax_live)
11664 insn = emit_insn (gen_push (eax));
11665 allocate -= UNITS_PER_WORD;
11666 /* Note that SEH directives need to continue tracking the stack
11667 pointer even after the frame pointer has been set up. */
11668 if (sp_is_cfa_reg || TARGET_SEH)
11670 if (sp_is_cfa_reg)
11671 m->fs.cfa_offset += UNITS_PER_WORD;
11672 RTX_FRAME_RELATED_P (insn) = 1;
11673 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11674 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11675 plus_constant (Pmode, stack_pointer_rtx,
11676 -UNITS_PER_WORD)));
11680 if (r10_live)
11682 r10 = gen_rtx_REG (Pmode, R10_REG);
11683 insn = emit_insn (gen_push (r10));
11684 allocate -= UNITS_PER_WORD;
11685 if (sp_is_cfa_reg || TARGET_SEH)
11687 if (sp_is_cfa_reg)
11688 m->fs.cfa_offset += UNITS_PER_WORD;
11689 RTX_FRAME_RELATED_P (insn) = 1;
11690 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11691 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11692 plus_constant (Pmode, stack_pointer_rtx,
11693 -UNITS_PER_WORD)));
11697 emit_move_insn (eax, GEN_INT (allocate));
11698 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11700 /* Use the fact that AX still contains ALLOCATE. */
11701 adjust_stack_insn = (Pmode == DImode
11702 ? gen_pro_epilogue_adjust_stack_di_sub
11703 : gen_pro_epilogue_adjust_stack_si_sub);
11705 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11706 stack_pointer_rtx, eax));
11708 if (sp_is_cfa_reg || TARGET_SEH)
11710 if (sp_is_cfa_reg)
11711 m->fs.cfa_offset += allocate;
11712 RTX_FRAME_RELATED_P (insn) = 1;
11713 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11714 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11715 plus_constant (Pmode, stack_pointer_rtx,
11716 -allocate)));
11718 m->fs.sp_offset += allocate;
11720 /* Use stack_pointer_rtx for relative addressing so that code
11721 works for realigned stack, too. */
11722 if (r10_live && eax_live)
11724 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11725 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11726 gen_frame_mem (word_mode, t));
11727 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11728 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11729 gen_frame_mem (word_mode, t));
11731 else if (eax_live || r10_live)
11733 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11734 emit_move_insn (gen_rtx_REG (word_mode,
11735 (eax_live ? AX_REG : R10_REG)),
11736 gen_frame_mem (word_mode, t));
11739 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11741 /* If we havn't already set up the frame pointer, do so now. */
11742 if (frame_pointer_needed && !m->fs.fp_valid)
11744 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11745 GEN_INT (frame.stack_pointer_offset
11746 - frame.hard_frame_pointer_offset));
11747 insn = emit_insn (insn);
11748 RTX_FRAME_RELATED_P (insn) = 1;
11749 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11751 if (m->fs.cfa_reg == stack_pointer_rtx)
11752 m->fs.cfa_reg = hard_frame_pointer_rtx;
11753 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11754 m->fs.fp_valid = true;
11757 if (!int_registers_saved)
11758 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11759 if (!sse_registers_saved)
11760 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11762 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11763 in PROLOGUE. */
11764 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11766 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11767 insn = emit_insn (gen_set_got (pic));
11768 RTX_FRAME_RELATED_P (insn) = 1;
11769 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11770 emit_insn (gen_prologue_use (pic));
11771 /* Deleting already emmitted SET_GOT if exist and allocated to
11772 REAL_PIC_OFFSET_TABLE_REGNUM. */
11773 ix86_elim_entry_set_got (pic);
11776 if (crtl->drap_reg && !crtl->stack_realign_needed)
11778 /* vDRAP is setup but after reload it turns out stack realign
11779 isn't necessary, here we will emit prologue to setup DRAP
11780 without stack realign adjustment */
11781 t = choose_baseaddr (0);
11782 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11785 /* Prevent instructions from being scheduled into register save push
11786 sequence when access to the redzone area is done through frame pointer.
11787 The offset between the frame pointer and the stack pointer is calculated
11788 relative to the value of the stack pointer at the end of the function
11789 prologue, and moving instructions that access redzone area via frame
11790 pointer inside push sequence violates this assumption. */
11791 if (frame_pointer_needed && frame.red_zone_size)
11792 emit_insn (gen_memory_blockage ());
11794 /* Emit cld instruction if stringops are used in the function. */
11795 if (TARGET_CLD && ix86_current_function_needs_cld)
11796 emit_insn (gen_cld ());
11798 /* SEH requires that the prologue end within 256 bytes of the start of
11799 the function. Prevent instruction schedules that would extend that.
11800 Further, prevent alloca modifications to the stack pointer from being
11801 combined with prologue modifications. */
11802 if (TARGET_SEH)
11803 emit_insn (gen_prologue_use (stack_pointer_rtx));
11806 /* Emit code to restore REG using a POP insn. */
11808 static void
11809 ix86_emit_restore_reg_using_pop (rtx reg)
11811 struct machine_function *m = cfun->machine;
11812 rtx_insn *insn = emit_insn (gen_pop (reg));
11814 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11815 m->fs.sp_offset -= UNITS_PER_WORD;
11817 if (m->fs.cfa_reg == crtl->drap_reg
11818 && REGNO (reg) == REGNO (crtl->drap_reg))
11820 /* Previously we'd represented the CFA as an expression
11821 like *(%ebp - 8). We've just popped that value from
11822 the stack, which means we need to reset the CFA to
11823 the drap register. This will remain until we restore
11824 the stack pointer. */
11825 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11826 RTX_FRAME_RELATED_P (insn) = 1;
11828 /* This means that the DRAP register is valid for addressing too. */
11829 m->fs.drap_valid = true;
11830 return;
11833 if (m->fs.cfa_reg == stack_pointer_rtx)
11835 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11836 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11837 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11838 RTX_FRAME_RELATED_P (insn) = 1;
11840 m->fs.cfa_offset -= UNITS_PER_WORD;
11843 /* When the frame pointer is the CFA, and we pop it, we are
11844 swapping back to the stack pointer as the CFA. This happens
11845 for stack frames that don't allocate other data, so we assume
11846 the stack pointer is now pointing at the return address, i.e.
11847 the function entry state, which makes the offset be 1 word. */
11848 if (reg == hard_frame_pointer_rtx)
11850 m->fs.fp_valid = false;
11851 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11853 m->fs.cfa_reg = stack_pointer_rtx;
11854 m->fs.cfa_offset -= UNITS_PER_WORD;
11856 add_reg_note (insn, REG_CFA_DEF_CFA,
11857 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11858 GEN_INT (m->fs.cfa_offset)));
11859 RTX_FRAME_RELATED_P (insn) = 1;
11864 /* Emit code to restore saved registers using POP insns. */
11866 static void
11867 ix86_emit_restore_regs_using_pop (void)
11869 unsigned int regno;
11871 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11872 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11873 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11876 /* Emit code and notes for the LEAVE instruction. */
11878 static void
11879 ix86_emit_leave (void)
11881 struct machine_function *m = cfun->machine;
11882 rtx_insn *insn = emit_insn (ix86_gen_leave ());
11884 ix86_add_queued_cfa_restore_notes (insn);
11886 gcc_assert (m->fs.fp_valid);
11887 m->fs.sp_valid = true;
11888 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11889 m->fs.fp_valid = false;
11891 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11893 m->fs.cfa_reg = stack_pointer_rtx;
11894 m->fs.cfa_offset = m->fs.sp_offset;
11896 add_reg_note (insn, REG_CFA_DEF_CFA,
11897 plus_constant (Pmode, stack_pointer_rtx,
11898 m->fs.sp_offset));
11899 RTX_FRAME_RELATED_P (insn) = 1;
11901 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11902 m->fs.fp_offset);
11905 /* Emit code to restore saved registers using MOV insns.
11906 First register is restored from CFA - CFA_OFFSET. */
11907 static void
11908 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11909 bool maybe_eh_return)
11911 struct machine_function *m = cfun->machine;
11912 unsigned int regno;
11914 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11915 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11917 rtx reg = gen_rtx_REG (word_mode, regno);
11918 rtx mem;
11919 rtx_insn *insn;
11921 mem = choose_baseaddr (cfa_offset);
11922 mem = gen_frame_mem (word_mode, mem);
11923 insn = emit_move_insn (reg, mem);
11925 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11927 /* Previously we'd represented the CFA as an expression
11928 like *(%ebp - 8). We've just popped that value from
11929 the stack, which means we need to reset the CFA to
11930 the drap register. This will remain until we restore
11931 the stack pointer. */
11932 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11933 RTX_FRAME_RELATED_P (insn) = 1;
11935 /* This means that the DRAP register is valid for addressing. */
11936 m->fs.drap_valid = true;
11938 else
11939 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
11941 cfa_offset -= UNITS_PER_WORD;
11945 /* Emit code to restore saved registers using MOV insns.
11946 First register is restored from CFA - CFA_OFFSET. */
11947 static void
11948 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11949 bool maybe_eh_return)
11951 unsigned int regno;
11953 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11954 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11956 rtx reg = gen_rtx_REG (V4SFmode, regno);
11957 rtx mem;
11959 mem = choose_baseaddr (cfa_offset);
11960 mem = gen_rtx_MEM (V4SFmode, mem);
11961 set_mem_align (mem, 128);
11962 emit_move_insn (reg, mem);
11964 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
11966 cfa_offset -= 16;
11970 /* Restore function stack, frame, and registers. */
11972 void
11973 ix86_expand_epilogue (int style)
11975 struct machine_function *m = cfun->machine;
11976 struct machine_frame_state frame_state_save = m->fs;
11977 struct ix86_frame frame;
11978 bool restore_regs_via_mov;
11979 bool using_drap;
11981 ix86_finalize_stack_realign_flags ();
11982 ix86_compute_frame_layout (&frame);
11984 m->fs.sp_valid = (!frame_pointer_needed
11985 || (crtl->sp_is_unchanging
11986 && !stack_realign_fp));
11987 gcc_assert (!m->fs.sp_valid
11988 || m->fs.sp_offset == frame.stack_pointer_offset);
11990 /* The FP must be valid if the frame pointer is present. */
11991 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11992 gcc_assert (!m->fs.fp_valid
11993 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11995 /* We must have *some* valid pointer to the stack frame. */
11996 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11998 /* The DRAP is never valid at this point. */
11999 gcc_assert (!m->fs.drap_valid);
12001 /* See the comment about red zone and frame
12002 pointer usage in ix86_expand_prologue. */
12003 if (frame_pointer_needed && frame.red_zone_size)
12004 emit_insn (gen_memory_blockage ());
12006 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
12007 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
12009 /* Determine the CFA offset of the end of the red-zone. */
12010 m->fs.red_zone_offset = 0;
12011 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
12013 /* The red-zone begins below the return address. */
12014 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
12016 /* When the register save area is in the aligned portion of
12017 the stack, determine the maximum runtime displacement that
12018 matches up with the aligned frame. */
12019 if (stack_realign_drap)
12020 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
12021 + UNITS_PER_WORD);
12024 /* Special care must be taken for the normal return case of a function
12025 using eh_return: the eax and edx registers are marked as saved, but
12026 not restored along this path. Adjust the save location to match. */
12027 if (crtl->calls_eh_return && style != 2)
12028 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
12030 /* EH_RETURN requires the use of moves to function properly. */
12031 if (crtl->calls_eh_return)
12032 restore_regs_via_mov = true;
12033 /* SEH requires the use of pops to identify the epilogue. */
12034 else if (TARGET_SEH)
12035 restore_regs_via_mov = false;
12036 /* If we're only restoring one register and sp is not valid then
12037 using a move instruction to restore the register since it's
12038 less work than reloading sp and popping the register. */
12039 else if (!m->fs.sp_valid && frame.nregs <= 1)
12040 restore_regs_via_mov = true;
12041 else if (TARGET_EPILOGUE_USING_MOVE
12042 && cfun->machine->use_fast_prologue_epilogue
12043 && (frame.nregs > 1
12044 || m->fs.sp_offset != frame.reg_save_offset))
12045 restore_regs_via_mov = true;
12046 else if (frame_pointer_needed
12047 && !frame.nregs
12048 && m->fs.sp_offset != frame.reg_save_offset)
12049 restore_regs_via_mov = true;
12050 else if (frame_pointer_needed
12051 && TARGET_USE_LEAVE
12052 && cfun->machine->use_fast_prologue_epilogue
12053 && frame.nregs == 1)
12054 restore_regs_via_mov = true;
12055 else
12056 restore_regs_via_mov = false;
12058 if (restore_regs_via_mov || frame.nsseregs)
12060 /* Ensure that the entire register save area is addressable via
12061 the stack pointer, if we will restore via sp. */
12062 if (TARGET_64BIT
12063 && m->fs.sp_offset > 0x7fffffff
12064 && !(m->fs.fp_valid || m->fs.drap_valid)
12065 && (frame.nsseregs + frame.nregs) != 0)
12067 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12068 GEN_INT (m->fs.sp_offset
12069 - frame.sse_reg_save_offset),
12070 style,
12071 m->fs.cfa_reg == stack_pointer_rtx);
12075 /* If there are any SSE registers to restore, then we have to do it
12076 via moves, since there's obviously no pop for SSE regs. */
12077 if (frame.nsseregs)
12078 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
12079 style == 2);
12081 if (restore_regs_via_mov)
12083 rtx t;
12085 if (frame.nregs)
12086 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
12088 /* eh_return epilogues need %ecx added to the stack pointer. */
12089 if (style == 2)
12091 rtx sa = EH_RETURN_STACKADJ_RTX;
12092 rtx_insn *insn;
12094 /* Stack align doesn't work with eh_return. */
12095 gcc_assert (!stack_realign_drap);
12096 /* Neither does regparm nested functions. */
12097 gcc_assert (!ix86_static_chain_on_stack);
12099 if (frame_pointer_needed)
12101 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
12102 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
12103 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
12105 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
12106 insn = emit_move_insn (hard_frame_pointer_rtx, t);
12108 /* Note that we use SA as a temporary CFA, as the return
12109 address is at the proper place relative to it. We
12110 pretend this happens at the FP restore insn because
12111 prior to this insn the FP would be stored at the wrong
12112 offset relative to SA, and after this insn we have no
12113 other reasonable register to use for the CFA. We don't
12114 bother resetting the CFA to the SP for the duration of
12115 the return insn. */
12116 add_reg_note (insn, REG_CFA_DEF_CFA,
12117 plus_constant (Pmode, sa, UNITS_PER_WORD));
12118 ix86_add_queued_cfa_restore_notes (insn);
12119 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
12120 RTX_FRAME_RELATED_P (insn) = 1;
12122 m->fs.cfa_reg = sa;
12123 m->fs.cfa_offset = UNITS_PER_WORD;
12124 m->fs.fp_valid = false;
12126 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
12127 const0_rtx, style, false);
12129 else
12131 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
12132 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
12133 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
12134 ix86_add_queued_cfa_restore_notes (insn);
12136 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
12137 if (m->fs.cfa_offset != UNITS_PER_WORD)
12139 m->fs.cfa_offset = UNITS_PER_WORD;
12140 add_reg_note (insn, REG_CFA_DEF_CFA,
12141 plus_constant (Pmode, stack_pointer_rtx,
12142 UNITS_PER_WORD));
12143 RTX_FRAME_RELATED_P (insn) = 1;
12146 m->fs.sp_offset = UNITS_PER_WORD;
12147 m->fs.sp_valid = true;
12150 else
12152 /* SEH requires that the function end with (1) a stack adjustment
12153 if necessary, (2) a sequence of pops, and (3) a return or
12154 jump instruction. Prevent insns from the function body from
12155 being scheduled into this sequence. */
12156 if (TARGET_SEH)
12158 /* Prevent a catch region from being adjacent to the standard
12159 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
12160 several other flags that would be interesting to test are
12161 not yet set up. */
12162 if (flag_non_call_exceptions)
12163 emit_insn (gen_nops (const1_rtx));
12164 else
12165 emit_insn (gen_blockage ());
12168 /* First step is to deallocate the stack frame so that we can
12169 pop the registers. Also do it on SEH target for very large
12170 frame as the emitted instructions aren't allowed by the ABI in
12171 epilogues. */
12172 if (!m->fs.sp_valid
12173 || (TARGET_SEH
12174 && (m->fs.sp_offset - frame.reg_save_offset
12175 >= SEH_MAX_FRAME_SIZE)))
12177 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
12178 GEN_INT (m->fs.fp_offset
12179 - frame.reg_save_offset),
12180 style, false);
12182 else if (m->fs.sp_offset != frame.reg_save_offset)
12184 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12185 GEN_INT (m->fs.sp_offset
12186 - frame.reg_save_offset),
12187 style,
12188 m->fs.cfa_reg == stack_pointer_rtx);
12191 ix86_emit_restore_regs_using_pop ();
12194 /* If we used a stack pointer and haven't already got rid of it,
12195 then do so now. */
12196 if (m->fs.fp_valid)
12198 /* If the stack pointer is valid and pointing at the frame
12199 pointer store address, then we only need a pop. */
12200 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12201 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12202 /* Leave results in shorter dependency chains on CPUs that are
12203 able to grok it fast. */
12204 else if (TARGET_USE_LEAVE
12205 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12206 || !cfun->machine->use_fast_prologue_epilogue)
12207 ix86_emit_leave ();
12208 else
12210 pro_epilogue_adjust_stack (stack_pointer_rtx,
12211 hard_frame_pointer_rtx,
12212 const0_rtx, style, !using_drap);
12213 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12217 if (using_drap)
12219 int param_ptr_offset = UNITS_PER_WORD;
12220 rtx_insn *insn;
12222 gcc_assert (stack_realign_drap);
12224 if (ix86_static_chain_on_stack)
12225 param_ptr_offset += UNITS_PER_WORD;
12226 if (!call_used_regs[REGNO (crtl->drap_reg)])
12227 param_ptr_offset += UNITS_PER_WORD;
12229 insn = emit_insn (gen_rtx_SET
12230 (VOIDmode, stack_pointer_rtx,
12231 gen_rtx_PLUS (Pmode,
12232 crtl->drap_reg,
12233 GEN_INT (-param_ptr_offset))));
12234 m->fs.cfa_reg = stack_pointer_rtx;
12235 m->fs.cfa_offset = param_ptr_offset;
12236 m->fs.sp_offset = param_ptr_offset;
12237 m->fs.realigned = false;
12239 add_reg_note (insn, REG_CFA_DEF_CFA,
12240 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12241 GEN_INT (param_ptr_offset)));
12242 RTX_FRAME_RELATED_P (insn) = 1;
12244 if (!call_used_regs[REGNO (crtl->drap_reg)])
12245 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12248 /* At this point the stack pointer must be valid, and we must have
12249 restored all of the registers. We may not have deallocated the
12250 entire stack frame. We've delayed this until now because it may
12251 be possible to merge the local stack deallocation with the
12252 deallocation forced by ix86_static_chain_on_stack. */
12253 gcc_assert (m->fs.sp_valid);
12254 gcc_assert (!m->fs.fp_valid);
12255 gcc_assert (!m->fs.realigned);
12256 if (m->fs.sp_offset != UNITS_PER_WORD)
12258 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12259 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12260 style, true);
12262 else
12263 ix86_add_queued_cfa_restore_notes (get_last_insn ());
12265 /* Sibcall epilogues don't want a return instruction. */
12266 if (style == 0)
12268 m->fs = frame_state_save;
12269 return;
12272 if (crtl->args.pops_args && crtl->args.size)
12274 rtx popc = GEN_INT (crtl->args.pops_args);
12276 /* i386 can only pop 64K bytes. If asked to pop more, pop return
12277 address, do explicit add, and jump indirectly to the caller. */
12279 if (crtl->args.pops_args >= 65536)
12281 rtx ecx = gen_rtx_REG (SImode, CX_REG);
12282 rtx_insn *insn;
12284 /* There is no "pascal" calling convention in any 64bit ABI. */
12285 gcc_assert (!TARGET_64BIT);
12287 insn = emit_insn (gen_pop (ecx));
12288 m->fs.cfa_offset -= UNITS_PER_WORD;
12289 m->fs.sp_offset -= UNITS_PER_WORD;
12291 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12292 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
12293 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12294 add_reg_note (insn, REG_CFA_REGISTER,
12295 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
12296 RTX_FRAME_RELATED_P (insn) = 1;
12298 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12299 popc, -1, true);
12300 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12302 else
12303 emit_jump_insn (gen_simple_return_pop_internal (popc));
12305 else
12306 emit_jump_insn (gen_simple_return_internal ());
12308 /* Restore the state back to the state from the prologue,
12309 so that it's correct for the next epilogue. */
12310 m->fs = frame_state_save;
12313 /* Reset from the function's potential modifications. */
12315 static void
12316 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12318 if (pic_offset_table_rtx
12319 && !ix86_use_pseudo_pic_reg ())
12320 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12321 #if TARGET_MACHO
12322 /* Mach-O doesn't support labels at the end of objects, so if
12323 it looks like we might want one, insert a NOP. */
12325 rtx_insn *insn = get_last_insn ();
12326 rtx_insn *deleted_debug_label = NULL;
12327 while (insn
12328 && NOTE_P (insn)
12329 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12331 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12332 notes only, instead set their CODE_LABEL_NUMBER to -1,
12333 otherwise there would be code generation differences
12334 in between -g and -g0. */
12335 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12336 deleted_debug_label = insn;
12337 insn = PREV_INSN (insn);
12339 if (insn
12340 && (LABEL_P (insn)
12341 || (NOTE_P (insn)
12342 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12343 fputs ("\tnop\n", file);
12344 else if (deleted_debug_label)
12345 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12346 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12347 CODE_LABEL_NUMBER (insn) = -1;
12349 #endif
12353 /* Return a scratch register to use in the split stack prologue. The
12354 split stack prologue is used for -fsplit-stack. It is the first
12355 instructions in the function, even before the regular prologue.
12356 The scratch register can be any caller-saved register which is not
12357 used for parameters or for the static chain. */
12359 static unsigned int
12360 split_stack_prologue_scratch_regno (void)
12362 if (TARGET_64BIT)
12363 return R11_REG;
12364 else
12366 bool is_fastcall, is_thiscall;
12367 int regparm;
12369 is_fastcall = (lookup_attribute ("fastcall",
12370 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12371 != NULL);
12372 is_thiscall = (lookup_attribute ("thiscall",
12373 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12374 != NULL);
12375 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12377 if (is_fastcall)
12379 if (DECL_STATIC_CHAIN (cfun->decl))
12381 sorry ("-fsplit-stack does not support fastcall with "
12382 "nested function");
12383 return INVALID_REGNUM;
12385 return AX_REG;
12387 else if (is_thiscall)
12389 if (!DECL_STATIC_CHAIN (cfun->decl))
12390 return DX_REG;
12391 return AX_REG;
12393 else if (regparm < 3)
12395 if (!DECL_STATIC_CHAIN (cfun->decl))
12396 return CX_REG;
12397 else
12399 if (regparm >= 2)
12401 sorry ("-fsplit-stack does not support 2 register "
12402 "parameters for a nested function");
12403 return INVALID_REGNUM;
12405 return DX_REG;
12408 else
12410 /* FIXME: We could make this work by pushing a register
12411 around the addition and comparison. */
12412 sorry ("-fsplit-stack does not support 3 register parameters");
12413 return INVALID_REGNUM;
12418 /* A SYMBOL_REF for the function which allocates new stackspace for
12419 -fsplit-stack. */
12421 static GTY(()) rtx split_stack_fn;
12423 /* A SYMBOL_REF for the more stack function when using the large
12424 model. */
12426 static GTY(()) rtx split_stack_fn_large;
12428 /* Handle -fsplit-stack. These are the first instructions in the
12429 function, even before the regular prologue. */
12431 void
12432 ix86_expand_split_stack_prologue (void)
12434 struct ix86_frame frame;
12435 HOST_WIDE_INT allocate;
12436 unsigned HOST_WIDE_INT args_size;
12437 rtx_code_label *label;
12438 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12439 rtx scratch_reg = NULL_RTX;
12440 rtx_code_label *varargs_label = NULL;
12441 rtx fn;
12443 gcc_assert (flag_split_stack && reload_completed);
12445 ix86_finalize_stack_realign_flags ();
12446 ix86_compute_frame_layout (&frame);
12447 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12449 /* This is the label we will branch to if we have enough stack
12450 space. We expect the basic block reordering pass to reverse this
12451 branch if optimizing, so that we branch in the unlikely case. */
12452 label = gen_label_rtx ();
12454 /* We need to compare the stack pointer minus the frame size with
12455 the stack boundary in the TCB. The stack boundary always gives
12456 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12457 can compare directly. Otherwise we need to do an addition. */
12459 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12460 UNSPEC_STACK_CHECK);
12461 limit = gen_rtx_CONST (Pmode, limit);
12462 limit = gen_rtx_MEM (Pmode, limit);
12463 if (allocate < SPLIT_STACK_AVAILABLE)
12464 current = stack_pointer_rtx;
12465 else
12467 unsigned int scratch_regno;
12468 rtx offset;
12470 /* We need a scratch register to hold the stack pointer minus
12471 the required frame size. Since this is the very start of the
12472 function, the scratch register can be any caller-saved
12473 register which is not used for parameters. */
12474 offset = GEN_INT (- allocate);
12475 scratch_regno = split_stack_prologue_scratch_regno ();
12476 if (scratch_regno == INVALID_REGNUM)
12477 return;
12478 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12479 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12481 /* We don't use ix86_gen_add3 in this case because it will
12482 want to split to lea, but when not optimizing the insn
12483 will not be split after this point. */
12484 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12485 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12486 offset)));
12488 else
12490 emit_move_insn (scratch_reg, offset);
12491 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12492 stack_pointer_rtx));
12494 current = scratch_reg;
12497 ix86_expand_branch (GEU, current, limit, label);
12498 jump_insn = get_last_insn ();
12499 JUMP_LABEL (jump_insn) = label;
12501 /* Mark the jump as very likely to be taken. */
12502 add_int_reg_note (jump_insn, REG_BR_PROB,
12503 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12505 if (split_stack_fn == NULL_RTX)
12507 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12508 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12510 fn = split_stack_fn;
12512 /* Get more stack space. We pass in the desired stack space and the
12513 size of the arguments to copy to the new stack. In 32-bit mode
12514 we push the parameters; __morestack will return on a new stack
12515 anyhow. In 64-bit mode we pass the parameters in r10 and
12516 r11. */
12517 allocate_rtx = GEN_INT (allocate);
12518 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12519 call_fusage = NULL_RTX;
12520 if (TARGET_64BIT)
12522 rtx reg10, reg11;
12524 reg10 = gen_rtx_REG (Pmode, R10_REG);
12525 reg11 = gen_rtx_REG (Pmode, R11_REG);
12527 /* If this function uses a static chain, it will be in %r10.
12528 Preserve it across the call to __morestack. */
12529 if (DECL_STATIC_CHAIN (cfun->decl))
12531 rtx rax;
12533 rax = gen_rtx_REG (word_mode, AX_REG);
12534 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12535 use_reg (&call_fusage, rax);
12538 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12539 && !TARGET_PECOFF)
12541 HOST_WIDE_INT argval;
12543 gcc_assert (Pmode == DImode);
12544 /* When using the large model we need to load the address
12545 into a register, and we've run out of registers. So we
12546 switch to a different calling convention, and we call a
12547 different function: __morestack_large. We pass the
12548 argument size in the upper 32 bits of r10 and pass the
12549 frame size in the lower 32 bits. */
12550 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12551 gcc_assert ((args_size & 0xffffffff) == args_size);
12553 if (split_stack_fn_large == NULL_RTX)
12555 split_stack_fn_large =
12556 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12557 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12559 if (ix86_cmodel == CM_LARGE_PIC)
12561 rtx_code_label *label;
12562 rtx x;
12564 label = gen_label_rtx ();
12565 emit_label (label);
12566 LABEL_PRESERVE_P (label) = 1;
12567 emit_insn (gen_set_rip_rex64 (reg10, label));
12568 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12569 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12570 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12571 UNSPEC_GOT);
12572 x = gen_rtx_CONST (Pmode, x);
12573 emit_move_insn (reg11, x);
12574 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12575 x = gen_const_mem (Pmode, x);
12576 emit_move_insn (reg11, x);
12578 else
12579 emit_move_insn (reg11, split_stack_fn_large);
12581 fn = reg11;
12583 argval = ((args_size << 16) << 16) + allocate;
12584 emit_move_insn (reg10, GEN_INT (argval));
12586 else
12588 emit_move_insn (reg10, allocate_rtx);
12589 emit_move_insn (reg11, GEN_INT (args_size));
12590 use_reg (&call_fusage, reg11);
12593 use_reg (&call_fusage, reg10);
12595 else
12597 emit_insn (gen_push (GEN_INT (args_size)));
12598 emit_insn (gen_push (allocate_rtx));
12600 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12601 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12602 NULL_RTX, false);
12603 add_function_usage_to (call_insn, call_fusage);
12605 /* In order to make call/return prediction work right, we now need
12606 to execute a return instruction. See
12607 libgcc/config/i386/morestack.S for the details on how this works.
12609 For flow purposes gcc must not see this as a return
12610 instruction--we need control flow to continue at the subsequent
12611 label. Therefore, we use an unspec. */
12612 gcc_assert (crtl->args.pops_args < 65536);
12613 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12615 /* If we are in 64-bit mode and this function uses a static chain,
12616 we saved %r10 in %rax before calling _morestack. */
12617 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12618 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12619 gen_rtx_REG (word_mode, AX_REG));
12621 /* If this function calls va_start, we need to store a pointer to
12622 the arguments on the old stack, because they may not have been
12623 all copied to the new stack. At this point the old stack can be
12624 found at the frame pointer value used by __morestack, because
12625 __morestack has set that up before calling back to us. Here we
12626 store that pointer in a scratch register, and in
12627 ix86_expand_prologue we store the scratch register in a stack
12628 slot. */
12629 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12631 unsigned int scratch_regno;
12632 rtx frame_reg;
12633 int words;
12635 scratch_regno = split_stack_prologue_scratch_regno ();
12636 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12637 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12639 /* 64-bit:
12640 fp -> old fp value
12641 return address within this function
12642 return address of caller of this function
12643 stack arguments
12644 So we add three words to get to the stack arguments.
12646 32-bit:
12647 fp -> old fp value
12648 return address within this function
12649 first argument to __morestack
12650 second argument to __morestack
12651 return address of caller of this function
12652 stack arguments
12653 So we add five words to get to the stack arguments.
12655 words = TARGET_64BIT ? 3 : 5;
12656 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12657 gen_rtx_PLUS (Pmode, frame_reg,
12658 GEN_INT (words * UNITS_PER_WORD))));
12660 varargs_label = gen_label_rtx ();
12661 emit_jump_insn (gen_jump (varargs_label));
12662 JUMP_LABEL (get_last_insn ()) = varargs_label;
12664 emit_barrier ();
12667 emit_label (label);
12668 LABEL_NUSES (label) = 1;
12670 /* If this function calls va_start, we now have to set the scratch
12671 register for the case where we do not call __morestack. In this
12672 case we need to set it based on the stack pointer. */
12673 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12675 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12676 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12677 GEN_INT (UNITS_PER_WORD))));
12679 emit_label (varargs_label);
12680 LABEL_NUSES (varargs_label) = 1;
12684 /* We may have to tell the dataflow pass that the split stack prologue
12685 is initializing a scratch register. */
12687 static void
12688 ix86_live_on_entry (bitmap regs)
12690 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12692 gcc_assert (flag_split_stack);
12693 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12697 /* Extract the parts of an RTL expression that is a valid memory address
12698 for an instruction. Return 0 if the structure of the address is
12699 grossly off. Return -1 if the address contains ASHIFT, so it is not
12700 strictly valid, but still used for computing length of lea instruction. */
12703 ix86_decompose_address (rtx addr, struct ix86_address *out)
12705 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12706 rtx base_reg, index_reg;
12707 HOST_WIDE_INT scale = 1;
12708 rtx scale_rtx = NULL_RTX;
12709 rtx tmp;
12710 int retval = 1;
12711 enum ix86_address_seg seg = SEG_DEFAULT;
12713 /* Allow zero-extended SImode addresses,
12714 they will be emitted with addr32 prefix. */
12715 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12717 if (GET_CODE (addr) == ZERO_EXTEND
12718 && GET_MODE (XEXP (addr, 0)) == SImode)
12720 addr = XEXP (addr, 0);
12721 if (CONST_INT_P (addr))
12722 return 0;
12724 else if (GET_CODE (addr) == AND
12725 && const_32bit_mask (XEXP (addr, 1), DImode))
12727 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12728 if (addr == NULL_RTX)
12729 return 0;
12731 if (CONST_INT_P (addr))
12732 return 0;
12736 /* Allow SImode subregs of DImode addresses,
12737 they will be emitted with addr32 prefix. */
12738 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12740 if (GET_CODE (addr) == SUBREG
12741 && GET_MODE (SUBREG_REG (addr)) == DImode)
12743 addr = SUBREG_REG (addr);
12744 if (CONST_INT_P (addr))
12745 return 0;
12749 if (REG_P (addr))
12750 base = addr;
12751 else if (GET_CODE (addr) == SUBREG)
12753 if (REG_P (SUBREG_REG (addr)))
12754 base = addr;
12755 else
12756 return 0;
12758 else if (GET_CODE (addr) == PLUS)
12760 rtx addends[4], op;
12761 int n = 0, i;
12763 op = addr;
12766 if (n >= 4)
12767 return 0;
12768 addends[n++] = XEXP (op, 1);
12769 op = XEXP (op, 0);
12771 while (GET_CODE (op) == PLUS);
12772 if (n >= 4)
12773 return 0;
12774 addends[n] = op;
12776 for (i = n; i >= 0; --i)
12778 op = addends[i];
12779 switch (GET_CODE (op))
12781 case MULT:
12782 if (index)
12783 return 0;
12784 index = XEXP (op, 0);
12785 scale_rtx = XEXP (op, 1);
12786 break;
12788 case ASHIFT:
12789 if (index)
12790 return 0;
12791 index = XEXP (op, 0);
12792 tmp = XEXP (op, 1);
12793 if (!CONST_INT_P (tmp))
12794 return 0;
12795 scale = INTVAL (tmp);
12796 if ((unsigned HOST_WIDE_INT) scale > 3)
12797 return 0;
12798 scale = 1 << scale;
12799 break;
12801 case ZERO_EXTEND:
12802 op = XEXP (op, 0);
12803 if (GET_CODE (op) != UNSPEC)
12804 return 0;
12805 /* FALLTHRU */
12807 case UNSPEC:
12808 if (XINT (op, 1) == UNSPEC_TP
12809 && TARGET_TLS_DIRECT_SEG_REFS
12810 && seg == SEG_DEFAULT)
12811 seg = DEFAULT_TLS_SEG_REG;
12812 else
12813 return 0;
12814 break;
12816 case SUBREG:
12817 if (!REG_P (SUBREG_REG (op)))
12818 return 0;
12819 /* FALLTHRU */
12821 case REG:
12822 if (!base)
12823 base = op;
12824 else if (!index)
12825 index = op;
12826 else
12827 return 0;
12828 break;
12830 case CONST:
12831 case CONST_INT:
12832 case SYMBOL_REF:
12833 case LABEL_REF:
12834 if (disp)
12835 return 0;
12836 disp = op;
12837 break;
12839 default:
12840 return 0;
12844 else if (GET_CODE (addr) == MULT)
12846 index = XEXP (addr, 0); /* index*scale */
12847 scale_rtx = XEXP (addr, 1);
12849 else if (GET_CODE (addr) == ASHIFT)
12851 /* We're called for lea too, which implements ashift on occasion. */
12852 index = XEXP (addr, 0);
12853 tmp = XEXP (addr, 1);
12854 if (!CONST_INT_P (tmp))
12855 return 0;
12856 scale = INTVAL (tmp);
12857 if ((unsigned HOST_WIDE_INT) scale > 3)
12858 return 0;
12859 scale = 1 << scale;
12860 retval = -1;
12862 else
12863 disp = addr; /* displacement */
12865 if (index)
12867 if (REG_P (index))
12869 else if (GET_CODE (index) == SUBREG
12870 && REG_P (SUBREG_REG (index)))
12872 else
12873 return 0;
12876 /* Extract the integral value of scale. */
12877 if (scale_rtx)
12879 if (!CONST_INT_P (scale_rtx))
12880 return 0;
12881 scale = INTVAL (scale_rtx);
12884 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12885 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12887 /* Avoid useless 0 displacement. */
12888 if (disp == const0_rtx && (base || index))
12889 disp = NULL_RTX;
12891 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12892 if (base_reg && index_reg && scale == 1
12893 && (index_reg == arg_pointer_rtx
12894 || index_reg == frame_pointer_rtx
12895 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12897 std::swap (base, index);
12898 std::swap (base_reg, index_reg);
12901 /* Special case: %ebp cannot be encoded as a base without a displacement.
12902 Similarly %r13. */
12903 if (!disp
12904 && base_reg
12905 && (base_reg == hard_frame_pointer_rtx
12906 || base_reg == frame_pointer_rtx
12907 || base_reg == arg_pointer_rtx
12908 || (REG_P (base_reg)
12909 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12910 || REGNO (base_reg) == R13_REG))))
12911 disp = const0_rtx;
12913 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12914 Avoid this by transforming to [%esi+0].
12915 Reload calls address legitimization without cfun defined, so we need
12916 to test cfun for being non-NULL. */
12917 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12918 && base_reg && !index_reg && !disp
12919 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12920 disp = const0_rtx;
12922 /* Special case: encode reg+reg instead of reg*2. */
12923 if (!base && index && scale == 2)
12924 base = index, base_reg = index_reg, scale = 1;
12926 /* Special case: scaling cannot be encoded without base or displacement. */
12927 if (!base && !disp && index && scale != 1)
12928 disp = const0_rtx;
12930 out->base = base;
12931 out->index = index;
12932 out->disp = disp;
12933 out->scale = scale;
12934 out->seg = seg;
12936 return retval;
12939 /* Return cost of the memory address x.
12940 For i386, it is better to use a complex address than let gcc copy
12941 the address into a reg and make a new pseudo. But not if the address
12942 requires to two regs - that would mean more pseudos with longer
12943 lifetimes. */
12944 static int
12945 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
12947 struct ix86_address parts;
12948 int cost = 1;
12949 int ok = ix86_decompose_address (x, &parts);
12951 gcc_assert (ok);
12953 if (parts.base && GET_CODE (parts.base) == SUBREG)
12954 parts.base = SUBREG_REG (parts.base);
12955 if (parts.index && GET_CODE (parts.index) == SUBREG)
12956 parts.index = SUBREG_REG (parts.index);
12958 /* Attempt to minimize number of registers in the address by increasing
12959 address cost for each used register. We don't increase address cost
12960 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
12961 is not invariant itself it most likely means that base or index is not
12962 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
12963 which is not profitable for x86. */
12964 if (parts.base
12965 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12966 && (current_pass->type == GIMPLE_PASS
12967 || !pic_offset_table_rtx
12968 || !REG_P (parts.base)
12969 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
12970 cost++;
12972 if (parts.index
12973 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12974 && (current_pass->type == GIMPLE_PASS
12975 || !pic_offset_table_rtx
12976 || !REG_P (parts.index)
12977 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
12978 cost++;
12980 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12981 since it's predecode logic can't detect the length of instructions
12982 and it degenerates to vector decoded. Increase cost of such
12983 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12984 to split such addresses or even refuse such addresses at all.
12986 Following addressing modes are affected:
12987 [base+scale*index]
12988 [scale*index+disp]
12989 [base+index]
12991 The first and last case may be avoidable by explicitly coding the zero in
12992 memory address, but I don't have AMD-K6 machine handy to check this
12993 theory. */
12995 if (TARGET_K6
12996 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12997 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12998 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12999 cost += 10;
13001 return cost;
13004 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
13005 this is used for to form addresses to local data when -fPIC is in
13006 use. */
13008 static bool
13009 darwin_local_data_pic (rtx disp)
13011 return (GET_CODE (disp) == UNSPEC
13012 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
13015 /* Determine if a given RTX is a valid constant. We already know this
13016 satisfies CONSTANT_P. */
13018 static bool
13019 ix86_legitimate_constant_p (machine_mode, rtx x)
13021 /* Pointer bounds constants are not valid. */
13022 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
13023 return false;
13025 switch (GET_CODE (x))
13027 case CONST:
13028 x = XEXP (x, 0);
13030 if (GET_CODE (x) == PLUS)
13032 if (!CONST_INT_P (XEXP (x, 1)))
13033 return false;
13034 x = XEXP (x, 0);
13037 if (TARGET_MACHO && darwin_local_data_pic (x))
13038 return true;
13040 /* Only some unspecs are valid as "constants". */
13041 if (GET_CODE (x) == UNSPEC)
13042 switch (XINT (x, 1))
13044 case UNSPEC_GOT:
13045 case UNSPEC_GOTOFF:
13046 case UNSPEC_PLTOFF:
13047 return TARGET_64BIT;
13048 case UNSPEC_TPOFF:
13049 case UNSPEC_NTPOFF:
13050 x = XVECEXP (x, 0, 0);
13051 return (GET_CODE (x) == SYMBOL_REF
13052 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13053 case UNSPEC_DTPOFF:
13054 x = XVECEXP (x, 0, 0);
13055 return (GET_CODE (x) == SYMBOL_REF
13056 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
13057 default:
13058 return false;
13061 /* We must have drilled down to a symbol. */
13062 if (GET_CODE (x) == LABEL_REF)
13063 return true;
13064 if (GET_CODE (x) != SYMBOL_REF)
13065 return false;
13066 /* FALLTHRU */
13068 case SYMBOL_REF:
13069 /* TLS symbols are never valid. */
13070 if (SYMBOL_REF_TLS_MODEL (x))
13071 return false;
13073 /* DLLIMPORT symbols are never valid. */
13074 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13075 && SYMBOL_REF_DLLIMPORT_P (x))
13076 return false;
13078 #if TARGET_MACHO
13079 /* mdynamic-no-pic */
13080 if (MACHO_DYNAMIC_NO_PIC_P)
13081 return machopic_symbol_defined_p (x);
13082 #endif
13083 break;
13085 case CONST_WIDE_INT:
13086 if (!TARGET_64BIT && !standard_sse_constant_p (x))
13087 return false;
13088 break;
13090 case CONST_VECTOR:
13091 if (!standard_sse_constant_p (x))
13092 return false;
13094 default:
13095 break;
13098 /* Otherwise we handle everything else in the move patterns. */
13099 return true;
13102 /* Determine if it's legal to put X into the constant pool. This
13103 is not possible for the address of thread-local symbols, which
13104 is checked above. */
13106 static bool
13107 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
13109 /* We can always put integral constants and vectors in memory. */
13110 switch (GET_CODE (x))
13112 case CONST_INT:
13113 case CONST_WIDE_INT:
13114 case CONST_DOUBLE:
13115 case CONST_VECTOR:
13116 return false;
13118 default:
13119 break;
13121 return !ix86_legitimate_constant_p (mode, x);
13124 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
13125 otherwise zero. */
13127 static bool
13128 is_imported_p (rtx x)
13130 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
13131 || GET_CODE (x) != SYMBOL_REF)
13132 return false;
13134 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
13138 /* Nonzero if the constant value X is a legitimate general operand
13139 when generating PIC code. It is given that flag_pic is on and
13140 that X satisfies CONSTANT_P. */
13142 bool
13143 legitimate_pic_operand_p (rtx x)
13145 rtx inner;
13147 switch (GET_CODE (x))
13149 case CONST:
13150 inner = XEXP (x, 0);
13151 if (GET_CODE (inner) == PLUS
13152 && CONST_INT_P (XEXP (inner, 1)))
13153 inner = XEXP (inner, 0);
13155 /* Only some unspecs are valid as "constants". */
13156 if (GET_CODE (inner) == UNSPEC)
13157 switch (XINT (inner, 1))
13159 case UNSPEC_GOT:
13160 case UNSPEC_GOTOFF:
13161 case UNSPEC_PLTOFF:
13162 return TARGET_64BIT;
13163 case UNSPEC_TPOFF:
13164 x = XVECEXP (inner, 0, 0);
13165 return (GET_CODE (x) == SYMBOL_REF
13166 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13167 case UNSPEC_MACHOPIC_OFFSET:
13168 return legitimate_pic_address_disp_p (x);
13169 default:
13170 return false;
13172 /* FALLTHRU */
13174 case SYMBOL_REF:
13175 case LABEL_REF:
13176 return legitimate_pic_address_disp_p (x);
13178 default:
13179 return true;
13183 /* Determine if a given CONST RTX is a valid memory displacement
13184 in PIC mode. */
13186 bool
13187 legitimate_pic_address_disp_p (rtx disp)
13189 bool saw_plus;
13191 /* In 64bit mode we can allow direct addresses of symbols and labels
13192 when they are not dynamic symbols. */
13193 if (TARGET_64BIT)
13195 rtx op0 = disp, op1;
13197 switch (GET_CODE (disp))
13199 case LABEL_REF:
13200 return true;
13202 case CONST:
13203 if (GET_CODE (XEXP (disp, 0)) != PLUS)
13204 break;
13205 op0 = XEXP (XEXP (disp, 0), 0);
13206 op1 = XEXP (XEXP (disp, 0), 1);
13207 if (!CONST_INT_P (op1)
13208 || INTVAL (op1) >= 16*1024*1024
13209 || INTVAL (op1) < -16*1024*1024)
13210 break;
13211 if (GET_CODE (op0) == LABEL_REF)
13212 return true;
13213 if (GET_CODE (op0) == CONST
13214 && GET_CODE (XEXP (op0, 0)) == UNSPEC
13215 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13216 return true;
13217 if (GET_CODE (op0) == UNSPEC
13218 && XINT (op0, 1) == UNSPEC_PCREL)
13219 return true;
13220 if (GET_CODE (op0) != SYMBOL_REF)
13221 break;
13222 /* FALLTHRU */
13224 case SYMBOL_REF:
13225 /* TLS references should always be enclosed in UNSPEC.
13226 The dllimported symbol needs always to be resolved. */
13227 if (SYMBOL_REF_TLS_MODEL (op0)
13228 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13229 return false;
13231 if (TARGET_PECOFF)
13233 if (is_imported_p (op0))
13234 return true;
13236 if (SYMBOL_REF_FAR_ADDR_P (op0)
13237 || !SYMBOL_REF_LOCAL_P (op0))
13238 break;
13240 /* Function-symbols need to be resolved only for
13241 large-model.
13242 For the small-model we don't need to resolve anything
13243 here. */
13244 if ((ix86_cmodel != CM_LARGE_PIC
13245 && SYMBOL_REF_FUNCTION_P (op0))
13246 || ix86_cmodel == CM_SMALL_PIC)
13247 return true;
13248 /* Non-external symbols don't need to be resolved for
13249 large, and medium-model. */
13250 if ((ix86_cmodel == CM_LARGE_PIC
13251 || ix86_cmodel == CM_MEDIUM_PIC)
13252 && !SYMBOL_REF_EXTERNAL_P (op0))
13253 return true;
13255 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13256 && (SYMBOL_REF_LOCAL_P (op0)
13257 || (HAVE_LD_PIE_COPYRELOC
13258 && flag_pie
13259 && !SYMBOL_REF_WEAK (op0)
13260 && !SYMBOL_REF_FUNCTION_P (op0)))
13261 && ix86_cmodel != CM_LARGE_PIC)
13262 return true;
13263 break;
13265 default:
13266 break;
13269 if (GET_CODE (disp) != CONST)
13270 return false;
13271 disp = XEXP (disp, 0);
13273 if (TARGET_64BIT)
13275 /* We are unsafe to allow PLUS expressions. This limit allowed distance
13276 of GOT tables. We should not need these anyway. */
13277 if (GET_CODE (disp) != UNSPEC
13278 || (XINT (disp, 1) != UNSPEC_GOTPCREL
13279 && XINT (disp, 1) != UNSPEC_GOTOFF
13280 && XINT (disp, 1) != UNSPEC_PCREL
13281 && XINT (disp, 1) != UNSPEC_PLTOFF))
13282 return false;
13284 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13285 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13286 return false;
13287 return true;
13290 saw_plus = false;
13291 if (GET_CODE (disp) == PLUS)
13293 if (!CONST_INT_P (XEXP (disp, 1)))
13294 return false;
13295 disp = XEXP (disp, 0);
13296 saw_plus = true;
13299 if (TARGET_MACHO && darwin_local_data_pic (disp))
13300 return true;
13302 if (GET_CODE (disp) != UNSPEC)
13303 return false;
13305 switch (XINT (disp, 1))
13307 case UNSPEC_GOT:
13308 if (saw_plus)
13309 return false;
13310 /* We need to check for both symbols and labels because VxWorks loads
13311 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
13312 details. */
13313 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13314 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13315 case UNSPEC_GOTOFF:
13316 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13317 While ABI specify also 32bit relocation but we don't produce it in
13318 small PIC model at all. */
13319 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13320 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13321 && !TARGET_64BIT)
13322 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13323 return false;
13324 case UNSPEC_GOTTPOFF:
13325 case UNSPEC_GOTNTPOFF:
13326 case UNSPEC_INDNTPOFF:
13327 if (saw_plus)
13328 return false;
13329 disp = XVECEXP (disp, 0, 0);
13330 return (GET_CODE (disp) == SYMBOL_REF
13331 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13332 case UNSPEC_NTPOFF:
13333 disp = XVECEXP (disp, 0, 0);
13334 return (GET_CODE (disp) == SYMBOL_REF
13335 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13336 case UNSPEC_DTPOFF:
13337 disp = XVECEXP (disp, 0, 0);
13338 return (GET_CODE (disp) == SYMBOL_REF
13339 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13342 return false;
13345 /* Determine if op is suitable RTX for an address register.
13346 Return naked register if a register or a register subreg is
13347 found, otherwise return NULL_RTX. */
13349 static rtx
13350 ix86_validate_address_register (rtx op)
13352 machine_mode mode = GET_MODE (op);
13354 /* Only SImode or DImode registers can form the address. */
13355 if (mode != SImode && mode != DImode)
13356 return NULL_RTX;
13358 if (REG_P (op))
13359 return op;
13360 else if (GET_CODE (op) == SUBREG)
13362 rtx reg = SUBREG_REG (op);
13364 if (!REG_P (reg))
13365 return NULL_RTX;
13367 mode = GET_MODE (reg);
13369 /* Don't allow SUBREGs that span more than a word. It can
13370 lead to spill failures when the register is one word out
13371 of a two word structure. */
13372 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13373 return NULL_RTX;
13375 /* Allow only SUBREGs of non-eliminable hard registers. */
13376 if (register_no_elim_operand (reg, mode))
13377 return reg;
13380 /* Op is not a register. */
13381 return NULL_RTX;
13384 /* Recognizes RTL expressions that are valid memory addresses for an
13385 instruction. The MODE argument is the machine mode for the MEM
13386 expression that wants to use this address.
13388 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13389 convert common non-canonical forms to canonical form so that they will
13390 be recognized. */
13392 static bool
13393 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13395 struct ix86_address parts;
13396 rtx base, index, disp;
13397 HOST_WIDE_INT scale;
13398 enum ix86_address_seg seg;
13400 if (ix86_decompose_address (addr, &parts) <= 0)
13401 /* Decomposition failed. */
13402 return false;
13404 base = parts.base;
13405 index = parts.index;
13406 disp = parts.disp;
13407 scale = parts.scale;
13408 seg = parts.seg;
13410 /* Validate base register. */
13411 if (base)
13413 rtx reg = ix86_validate_address_register (base);
13415 if (reg == NULL_RTX)
13416 return false;
13418 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13419 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13420 /* Base is not valid. */
13421 return false;
13424 /* Validate index register. */
13425 if (index)
13427 rtx reg = ix86_validate_address_register (index);
13429 if (reg == NULL_RTX)
13430 return false;
13432 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13433 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13434 /* Index is not valid. */
13435 return false;
13438 /* Index and base should have the same mode. */
13439 if (base && index
13440 && GET_MODE (base) != GET_MODE (index))
13441 return false;
13443 /* Address override works only on the (%reg) part of %fs:(%reg). */
13444 if (seg != SEG_DEFAULT
13445 && ((base && GET_MODE (base) != word_mode)
13446 || (index && GET_MODE (index) != word_mode)))
13447 return false;
13449 /* Validate scale factor. */
13450 if (scale != 1)
13452 if (!index)
13453 /* Scale without index. */
13454 return false;
13456 if (scale != 2 && scale != 4 && scale != 8)
13457 /* Scale is not a valid multiplier. */
13458 return false;
13461 /* Validate displacement. */
13462 if (disp)
13464 if (GET_CODE (disp) == CONST
13465 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13466 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13467 switch (XINT (XEXP (disp, 0), 1))
13469 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13470 used. While ABI specify also 32bit relocations, we don't produce
13471 them at all and use IP relative instead. */
13472 case UNSPEC_GOT:
13473 case UNSPEC_GOTOFF:
13474 gcc_assert (flag_pic);
13475 if (!TARGET_64BIT)
13476 goto is_legitimate_pic;
13478 /* 64bit address unspec. */
13479 return false;
13481 case UNSPEC_GOTPCREL:
13482 case UNSPEC_PCREL:
13483 gcc_assert (flag_pic);
13484 goto is_legitimate_pic;
13486 case UNSPEC_GOTTPOFF:
13487 case UNSPEC_GOTNTPOFF:
13488 case UNSPEC_INDNTPOFF:
13489 case UNSPEC_NTPOFF:
13490 case UNSPEC_DTPOFF:
13491 break;
13493 case UNSPEC_STACK_CHECK:
13494 gcc_assert (flag_split_stack);
13495 break;
13497 default:
13498 /* Invalid address unspec. */
13499 return false;
13502 else if (SYMBOLIC_CONST (disp)
13503 && (flag_pic
13504 || (TARGET_MACHO
13505 #if TARGET_MACHO
13506 && MACHOPIC_INDIRECT
13507 && !machopic_operand_p (disp)
13508 #endif
13512 is_legitimate_pic:
13513 if (TARGET_64BIT && (index || base))
13515 /* foo@dtpoff(%rX) is ok. */
13516 if (GET_CODE (disp) != CONST
13517 || GET_CODE (XEXP (disp, 0)) != PLUS
13518 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13519 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13520 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13521 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13522 /* Non-constant pic memory reference. */
13523 return false;
13525 else if ((!TARGET_MACHO || flag_pic)
13526 && ! legitimate_pic_address_disp_p (disp))
13527 /* Displacement is an invalid pic construct. */
13528 return false;
13529 #if TARGET_MACHO
13530 else if (MACHO_DYNAMIC_NO_PIC_P
13531 && !ix86_legitimate_constant_p (Pmode, disp))
13532 /* displacment must be referenced via non_lazy_pointer */
13533 return false;
13534 #endif
13536 /* This code used to verify that a symbolic pic displacement
13537 includes the pic_offset_table_rtx register.
13539 While this is good idea, unfortunately these constructs may
13540 be created by "adds using lea" optimization for incorrect
13541 code like:
13543 int a;
13544 int foo(int i)
13546 return *(&a+i);
13549 This code is nonsensical, but results in addressing
13550 GOT table with pic_offset_table_rtx base. We can't
13551 just refuse it easily, since it gets matched by
13552 "addsi3" pattern, that later gets split to lea in the
13553 case output register differs from input. While this
13554 can be handled by separate addsi pattern for this case
13555 that never results in lea, this seems to be easier and
13556 correct fix for crash to disable this test. */
13558 else if (GET_CODE (disp) != LABEL_REF
13559 && !CONST_INT_P (disp)
13560 && (GET_CODE (disp) != CONST
13561 || !ix86_legitimate_constant_p (Pmode, disp))
13562 && (GET_CODE (disp) != SYMBOL_REF
13563 || !ix86_legitimate_constant_p (Pmode, disp)))
13564 /* Displacement is not constant. */
13565 return false;
13566 else if (TARGET_64BIT
13567 && !x86_64_immediate_operand (disp, VOIDmode))
13568 /* Displacement is out of range. */
13569 return false;
13570 /* In x32 mode, constant addresses are sign extended to 64bit, so
13571 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13572 else if (TARGET_X32 && !(index || base)
13573 && CONST_INT_P (disp)
13574 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13575 return false;
13578 /* Everything looks valid. */
13579 return true;
13582 /* Determine if a given RTX is a valid constant address. */
13584 bool
13585 constant_address_p (rtx x)
13587 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13590 /* Return a unique alias set for the GOT. */
13592 static alias_set_type
13593 ix86_GOT_alias_set (void)
13595 static alias_set_type set = -1;
13596 if (set == -1)
13597 set = new_alias_set ();
13598 return set;
13601 /* Return a legitimate reference for ORIG (an address) using the
13602 register REG. If REG is 0, a new pseudo is generated.
13604 There are two types of references that must be handled:
13606 1. Global data references must load the address from the GOT, via
13607 the PIC reg. An insn is emitted to do this load, and the reg is
13608 returned.
13610 2. Static data references, constant pool addresses, and code labels
13611 compute the address as an offset from the GOT, whose base is in
13612 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13613 differentiate them from global data objects. The returned
13614 address is the PIC reg + an unspec constant.
13616 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13617 reg also appears in the address. */
13619 static rtx
13620 legitimize_pic_address (rtx orig, rtx reg)
13622 rtx addr = orig;
13623 rtx new_rtx = orig;
13625 #if TARGET_MACHO
13626 if (TARGET_MACHO && !TARGET_64BIT)
13628 if (reg == 0)
13629 reg = gen_reg_rtx (Pmode);
13630 /* Use the generic Mach-O PIC machinery. */
13631 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13633 #endif
13635 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13637 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13638 if (tmp)
13639 return tmp;
13642 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13643 new_rtx = addr;
13644 else if (TARGET_64BIT && !TARGET_PECOFF
13645 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13647 rtx tmpreg;
13648 /* This symbol may be referenced via a displacement from the PIC
13649 base address (@GOTOFF). */
13651 if (GET_CODE (addr) == CONST)
13652 addr = XEXP (addr, 0);
13653 if (GET_CODE (addr) == PLUS)
13655 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13656 UNSPEC_GOTOFF);
13657 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13659 else
13660 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13661 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13662 if (!reg)
13663 tmpreg = gen_reg_rtx (Pmode);
13664 else
13665 tmpreg = reg;
13666 emit_move_insn (tmpreg, new_rtx);
13668 if (reg != 0)
13670 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13671 tmpreg, 1, OPTAB_DIRECT);
13672 new_rtx = reg;
13674 else
13675 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13677 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13679 /* This symbol may be referenced via a displacement from the PIC
13680 base address (@GOTOFF). */
13682 if (GET_CODE (addr) == CONST)
13683 addr = XEXP (addr, 0);
13684 if (GET_CODE (addr) == PLUS)
13686 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13687 UNSPEC_GOTOFF);
13688 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13690 else
13691 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13692 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13693 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13695 if (reg != 0)
13697 emit_move_insn (reg, new_rtx);
13698 new_rtx = reg;
13701 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13702 /* We can't use @GOTOFF for text labels on VxWorks;
13703 see gotoff_operand. */
13704 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13706 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13707 if (tmp)
13708 return tmp;
13710 /* For x64 PE-COFF there is no GOT table. So we use address
13711 directly. */
13712 if (TARGET_64BIT && TARGET_PECOFF)
13714 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13715 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13717 if (reg == 0)
13718 reg = gen_reg_rtx (Pmode);
13719 emit_move_insn (reg, new_rtx);
13720 new_rtx = reg;
13722 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13724 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13725 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13726 new_rtx = gen_const_mem (Pmode, new_rtx);
13727 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13729 if (reg == 0)
13730 reg = gen_reg_rtx (Pmode);
13731 /* Use directly gen_movsi, otherwise the address is loaded
13732 into register for CSE. We don't want to CSE this addresses,
13733 instead we CSE addresses from the GOT table, so skip this. */
13734 emit_insn (gen_movsi (reg, new_rtx));
13735 new_rtx = reg;
13737 else
13739 /* This symbol must be referenced via a load from the
13740 Global Offset Table (@GOT). */
13742 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13743 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13744 if (TARGET_64BIT)
13745 new_rtx = force_reg (Pmode, new_rtx);
13746 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13747 new_rtx = gen_const_mem (Pmode, new_rtx);
13748 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13750 if (reg == 0)
13751 reg = gen_reg_rtx (Pmode);
13752 emit_move_insn (reg, new_rtx);
13753 new_rtx = reg;
13756 else
13758 if (CONST_INT_P (addr)
13759 && !x86_64_immediate_operand (addr, VOIDmode))
13761 if (reg)
13763 emit_move_insn (reg, addr);
13764 new_rtx = reg;
13766 else
13767 new_rtx = force_reg (Pmode, addr);
13769 else if (GET_CODE (addr) == CONST)
13771 addr = XEXP (addr, 0);
13773 /* We must match stuff we generate before. Assume the only
13774 unspecs that can get here are ours. Not that we could do
13775 anything with them anyway.... */
13776 if (GET_CODE (addr) == UNSPEC
13777 || (GET_CODE (addr) == PLUS
13778 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13779 return orig;
13780 gcc_assert (GET_CODE (addr) == PLUS);
13782 if (GET_CODE (addr) == PLUS)
13784 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13786 /* Check first to see if this is a constant offset from a @GOTOFF
13787 symbol reference. */
13788 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13789 && CONST_INT_P (op1))
13791 if (!TARGET_64BIT)
13793 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13794 UNSPEC_GOTOFF);
13795 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13796 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13797 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13799 if (reg != 0)
13801 emit_move_insn (reg, new_rtx);
13802 new_rtx = reg;
13805 else
13807 if (INTVAL (op1) < -16*1024*1024
13808 || INTVAL (op1) >= 16*1024*1024)
13810 if (!x86_64_immediate_operand (op1, Pmode))
13811 op1 = force_reg (Pmode, op1);
13812 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13816 else
13818 rtx base = legitimize_pic_address (op0, reg);
13819 machine_mode mode = GET_MODE (base);
13820 new_rtx
13821 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13823 if (CONST_INT_P (new_rtx))
13825 if (INTVAL (new_rtx) < -16*1024*1024
13826 || INTVAL (new_rtx) >= 16*1024*1024)
13828 if (!x86_64_immediate_operand (new_rtx, mode))
13829 new_rtx = force_reg (mode, new_rtx);
13830 new_rtx
13831 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13833 else
13834 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13836 else
13838 /* For %rip addressing, we have to use just disp32, not
13839 base nor index. */
13840 if (TARGET_64BIT
13841 && (GET_CODE (base) == SYMBOL_REF
13842 || GET_CODE (base) == LABEL_REF))
13843 base = force_reg (mode, base);
13844 if (GET_CODE (new_rtx) == PLUS
13845 && CONSTANT_P (XEXP (new_rtx, 1)))
13847 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13848 new_rtx = XEXP (new_rtx, 1);
13850 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13855 return new_rtx;
13858 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13860 static rtx
13861 get_thread_pointer (machine_mode tp_mode, bool to_reg)
13863 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13865 if (GET_MODE (tp) != tp_mode)
13867 gcc_assert (GET_MODE (tp) == SImode);
13868 gcc_assert (tp_mode == DImode);
13870 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13873 if (to_reg)
13874 tp = copy_to_mode_reg (tp_mode, tp);
13876 return tp;
13879 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13881 static GTY(()) rtx ix86_tls_symbol;
13883 static rtx
13884 ix86_tls_get_addr (void)
13886 if (!ix86_tls_symbol)
13888 const char *sym
13889 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13890 ? "___tls_get_addr" : "__tls_get_addr");
13892 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13895 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13897 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13898 UNSPEC_PLTOFF);
13899 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13900 gen_rtx_CONST (Pmode, unspec));
13903 return ix86_tls_symbol;
13906 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13908 static GTY(()) rtx ix86_tls_module_base_symbol;
13911 ix86_tls_module_base (void)
13913 if (!ix86_tls_module_base_symbol)
13915 ix86_tls_module_base_symbol
13916 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13918 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13919 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13922 return ix86_tls_module_base_symbol;
13925 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13926 false if we expect this to be used for a memory address and true if
13927 we expect to load the address into a register. */
13929 static rtx
13930 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13932 rtx dest, base, off;
13933 rtx pic = NULL_RTX, tp = NULL_RTX;
13934 machine_mode tp_mode = Pmode;
13935 int type;
13937 /* Fall back to global dynamic model if tool chain cannot support local
13938 dynamic. */
13939 if (TARGET_SUN_TLS && !TARGET_64BIT
13940 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13941 && model == TLS_MODEL_LOCAL_DYNAMIC)
13942 model = TLS_MODEL_GLOBAL_DYNAMIC;
13944 switch (model)
13946 case TLS_MODEL_GLOBAL_DYNAMIC:
13947 dest = gen_reg_rtx (Pmode);
13949 if (!TARGET_64BIT)
13951 if (flag_pic && !TARGET_PECOFF)
13952 pic = pic_offset_table_rtx;
13953 else
13955 pic = gen_reg_rtx (Pmode);
13956 emit_insn (gen_set_got (pic));
13960 if (TARGET_GNU2_TLS)
13962 if (TARGET_64BIT)
13963 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13964 else
13965 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13967 tp = get_thread_pointer (Pmode, true);
13968 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13970 if (GET_MODE (x) != Pmode)
13971 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13973 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13975 else
13977 rtx caddr = ix86_tls_get_addr ();
13979 if (TARGET_64BIT)
13981 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13982 rtx_insn *insns;
13984 start_sequence ();
13985 emit_call_insn
13986 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13987 insns = get_insns ();
13988 end_sequence ();
13990 if (GET_MODE (x) != Pmode)
13991 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13993 RTL_CONST_CALL_P (insns) = 1;
13994 emit_libcall_block (insns, dest, rax, x);
13996 else
13997 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13999 break;
14001 case TLS_MODEL_LOCAL_DYNAMIC:
14002 base = gen_reg_rtx (Pmode);
14004 if (!TARGET_64BIT)
14006 if (flag_pic)
14007 pic = pic_offset_table_rtx;
14008 else
14010 pic = gen_reg_rtx (Pmode);
14011 emit_insn (gen_set_got (pic));
14015 if (TARGET_GNU2_TLS)
14017 rtx tmp = ix86_tls_module_base ();
14019 if (TARGET_64BIT)
14020 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
14021 else
14022 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
14024 tp = get_thread_pointer (Pmode, true);
14025 set_unique_reg_note (get_last_insn (), REG_EQUAL,
14026 gen_rtx_MINUS (Pmode, tmp, tp));
14028 else
14030 rtx caddr = ix86_tls_get_addr ();
14032 if (TARGET_64BIT)
14034 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14035 rtx_insn *insns;
14036 rtx eqv;
14038 start_sequence ();
14039 emit_call_insn
14040 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
14041 insns = get_insns ();
14042 end_sequence ();
14044 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
14045 share the LD_BASE result with other LD model accesses. */
14046 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
14047 UNSPEC_TLS_LD_BASE);
14049 RTL_CONST_CALL_P (insns) = 1;
14050 emit_libcall_block (insns, base, rax, eqv);
14052 else
14053 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
14056 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
14057 off = gen_rtx_CONST (Pmode, off);
14059 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
14061 if (TARGET_GNU2_TLS)
14063 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
14065 if (GET_MODE (x) != Pmode)
14066 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14068 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14070 break;
14072 case TLS_MODEL_INITIAL_EXEC:
14073 if (TARGET_64BIT)
14075 if (TARGET_SUN_TLS && !TARGET_X32)
14077 /* The Sun linker took the AMD64 TLS spec literally
14078 and can only handle %rax as destination of the
14079 initial executable code sequence. */
14081 dest = gen_reg_rtx (DImode);
14082 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
14083 return dest;
14086 /* Generate DImode references to avoid %fs:(%reg32)
14087 problems and linker IE->LE relaxation bug. */
14088 tp_mode = DImode;
14089 pic = NULL;
14090 type = UNSPEC_GOTNTPOFF;
14092 else if (flag_pic)
14094 pic = pic_offset_table_rtx;
14095 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
14097 else if (!TARGET_ANY_GNU_TLS)
14099 pic = gen_reg_rtx (Pmode);
14100 emit_insn (gen_set_got (pic));
14101 type = UNSPEC_GOTTPOFF;
14103 else
14105 pic = NULL;
14106 type = UNSPEC_INDNTPOFF;
14109 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
14110 off = gen_rtx_CONST (tp_mode, off);
14111 if (pic)
14112 off = gen_rtx_PLUS (tp_mode, pic, off);
14113 off = gen_const_mem (tp_mode, off);
14114 set_mem_alias_set (off, ix86_GOT_alias_set ());
14116 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14118 base = get_thread_pointer (tp_mode,
14119 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14120 off = force_reg (tp_mode, off);
14121 return gen_rtx_PLUS (tp_mode, base, off);
14123 else
14125 base = get_thread_pointer (Pmode, true);
14126 dest = gen_reg_rtx (Pmode);
14127 emit_insn (ix86_gen_sub3 (dest, base, off));
14129 break;
14131 case TLS_MODEL_LOCAL_EXEC:
14132 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14133 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14134 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14135 off = gen_rtx_CONST (Pmode, off);
14137 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14139 base = get_thread_pointer (Pmode,
14140 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14141 return gen_rtx_PLUS (Pmode, base, off);
14143 else
14145 base = get_thread_pointer (Pmode, true);
14146 dest = gen_reg_rtx (Pmode);
14147 emit_insn (ix86_gen_sub3 (dest, base, off));
14149 break;
14151 default:
14152 gcc_unreachable ();
14155 return dest;
14158 /* Create or return the unique __imp_DECL dllimport symbol corresponding
14159 to symbol DECL if BEIMPORT is true. Otherwise create or return the
14160 unique refptr-DECL symbol corresponding to symbol DECL. */
14162 struct dllimport_hasher : ggc_cache_hasher<tree_map *>
14164 static inline hashval_t hash (tree_map *m) { return m->hash; }
14165 static inline bool
14166 equal (tree_map *a, tree_map *b)
14168 return a->base.from == b->base.from;
14171 static void
14172 handle_cache_entry (tree_map *&m)
14174 extern void gt_ggc_mx (tree_map *&);
14175 if (m == HTAB_EMPTY_ENTRY || m == HTAB_DELETED_ENTRY)
14176 return;
14177 else if (ggc_marked_p (m->base.from))
14178 gt_ggc_mx (m);
14179 else
14180 m = static_cast<tree_map *> (HTAB_DELETED_ENTRY);
14184 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
14186 static tree
14187 get_dllimport_decl (tree decl, bool beimport)
14189 struct tree_map *h, in;
14190 const char *name;
14191 const char *prefix;
14192 size_t namelen, prefixlen;
14193 char *imp_name;
14194 tree to;
14195 rtx rtl;
14197 if (!dllimport_map)
14198 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
14200 in.hash = htab_hash_pointer (decl);
14201 in.base.from = decl;
14202 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
14203 h = *loc;
14204 if (h)
14205 return h->to;
14207 *loc = h = ggc_alloc<tree_map> ();
14208 h->hash = in.hash;
14209 h->base.from = decl;
14210 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14211 VAR_DECL, NULL, ptr_type_node);
14212 DECL_ARTIFICIAL (to) = 1;
14213 DECL_IGNORED_P (to) = 1;
14214 DECL_EXTERNAL (to) = 1;
14215 TREE_READONLY (to) = 1;
14217 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14218 name = targetm.strip_name_encoding (name);
14219 if (beimport)
14220 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14221 ? "*__imp_" : "*__imp__";
14222 else
14223 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14224 namelen = strlen (name);
14225 prefixlen = strlen (prefix);
14226 imp_name = (char *) alloca (namelen + prefixlen + 1);
14227 memcpy (imp_name, prefix, prefixlen);
14228 memcpy (imp_name + prefixlen, name, namelen + 1);
14230 name = ggc_alloc_string (imp_name, namelen + prefixlen);
14231 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14232 SET_SYMBOL_REF_DECL (rtl, to);
14233 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14234 if (!beimport)
14236 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14237 #ifdef SUB_TARGET_RECORD_STUB
14238 SUB_TARGET_RECORD_STUB (name);
14239 #endif
14242 rtl = gen_const_mem (Pmode, rtl);
14243 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14245 SET_DECL_RTL (to, rtl);
14246 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14248 return to;
14251 /* Expand SYMBOL into its corresponding far-addresse symbol.
14252 WANT_REG is true if we require the result be a register. */
14254 static rtx
14255 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14257 tree imp_decl;
14258 rtx x;
14260 gcc_assert (SYMBOL_REF_DECL (symbol));
14261 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14263 x = DECL_RTL (imp_decl);
14264 if (want_reg)
14265 x = force_reg (Pmode, x);
14266 return x;
14269 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
14270 true if we require the result be a register. */
14272 static rtx
14273 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14275 tree imp_decl;
14276 rtx x;
14278 gcc_assert (SYMBOL_REF_DECL (symbol));
14279 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14281 x = DECL_RTL (imp_decl);
14282 if (want_reg)
14283 x = force_reg (Pmode, x);
14284 return x;
14287 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
14288 is true if we require the result be a register. */
14290 static rtx
14291 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14293 if (!TARGET_PECOFF)
14294 return NULL_RTX;
14296 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14298 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14299 return legitimize_dllimport_symbol (addr, inreg);
14300 if (GET_CODE (addr) == CONST
14301 && GET_CODE (XEXP (addr, 0)) == PLUS
14302 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14303 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14305 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14306 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14310 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14311 return NULL_RTX;
14312 if (GET_CODE (addr) == SYMBOL_REF
14313 && !is_imported_p (addr)
14314 && SYMBOL_REF_EXTERNAL_P (addr)
14315 && SYMBOL_REF_DECL (addr))
14316 return legitimize_pe_coff_extern_decl (addr, inreg);
14318 if (GET_CODE (addr) == CONST
14319 && GET_CODE (XEXP (addr, 0)) == PLUS
14320 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14321 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14322 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14323 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14325 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14326 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14328 return NULL_RTX;
14331 /* Try machine-dependent ways of modifying an illegitimate address
14332 to be legitimate. If we find one, return the new, valid address.
14333 This macro is used in only one place: `memory_address' in explow.c.
14335 OLDX is the address as it was before break_out_memory_refs was called.
14336 In some cases it is useful to look at this to decide what needs to be done.
14338 It is always safe for this macro to do nothing. It exists to recognize
14339 opportunities to optimize the output.
14341 For the 80386, we handle X+REG by loading X into a register R and
14342 using R+REG. R will go in a general reg and indexing will be used.
14343 However, if REG is a broken-out memory address or multiplication,
14344 nothing needs to be done because REG can certainly go in a general reg.
14346 When -fpic is used, special handling is needed for symbolic references.
14347 See comments by legitimize_pic_address in i386.c for details. */
14349 static rtx
14350 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14352 bool changed = false;
14353 unsigned log;
14355 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14356 if (log)
14357 return legitimize_tls_address (x, (enum tls_model) log, false);
14358 if (GET_CODE (x) == CONST
14359 && GET_CODE (XEXP (x, 0)) == PLUS
14360 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14361 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14363 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14364 (enum tls_model) log, false);
14365 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14368 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14370 rtx tmp = legitimize_pe_coff_symbol (x, true);
14371 if (tmp)
14372 return tmp;
14375 if (flag_pic && SYMBOLIC_CONST (x))
14376 return legitimize_pic_address (x, 0);
14378 #if TARGET_MACHO
14379 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14380 return machopic_indirect_data_reference (x, 0);
14381 #endif
14383 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14384 if (GET_CODE (x) == ASHIFT
14385 && CONST_INT_P (XEXP (x, 1))
14386 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14388 changed = true;
14389 log = INTVAL (XEXP (x, 1));
14390 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14391 GEN_INT (1 << log));
14394 if (GET_CODE (x) == PLUS)
14396 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14398 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14399 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14400 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14402 changed = true;
14403 log = INTVAL (XEXP (XEXP (x, 0), 1));
14404 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14405 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14406 GEN_INT (1 << log));
14409 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14410 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14411 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14413 changed = true;
14414 log = INTVAL (XEXP (XEXP (x, 1), 1));
14415 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14416 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14417 GEN_INT (1 << log));
14420 /* Put multiply first if it isn't already. */
14421 if (GET_CODE (XEXP (x, 1)) == MULT)
14423 std::swap (XEXP (x, 0), XEXP (x, 1));
14424 changed = true;
14427 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14428 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14429 created by virtual register instantiation, register elimination, and
14430 similar optimizations. */
14431 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14433 changed = true;
14434 x = gen_rtx_PLUS (Pmode,
14435 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14436 XEXP (XEXP (x, 1), 0)),
14437 XEXP (XEXP (x, 1), 1));
14440 /* Canonicalize
14441 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14442 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14443 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14444 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14445 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14446 && CONSTANT_P (XEXP (x, 1)))
14448 rtx constant;
14449 rtx other = NULL_RTX;
14451 if (CONST_INT_P (XEXP (x, 1)))
14453 constant = XEXP (x, 1);
14454 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14456 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14458 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14459 other = XEXP (x, 1);
14461 else
14462 constant = 0;
14464 if (constant)
14466 changed = true;
14467 x = gen_rtx_PLUS (Pmode,
14468 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14469 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14470 plus_constant (Pmode, other,
14471 INTVAL (constant)));
14475 if (changed && ix86_legitimate_address_p (mode, x, false))
14476 return x;
14478 if (GET_CODE (XEXP (x, 0)) == MULT)
14480 changed = true;
14481 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14484 if (GET_CODE (XEXP (x, 1)) == MULT)
14486 changed = true;
14487 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14490 if (changed
14491 && REG_P (XEXP (x, 1))
14492 && REG_P (XEXP (x, 0)))
14493 return x;
14495 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14497 changed = true;
14498 x = legitimize_pic_address (x, 0);
14501 if (changed && ix86_legitimate_address_p (mode, x, false))
14502 return x;
14504 if (REG_P (XEXP (x, 0)))
14506 rtx temp = gen_reg_rtx (Pmode);
14507 rtx val = force_operand (XEXP (x, 1), temp);
14508 if (val != temp)
14510 val = convert_to_mode (Pmode, val, 1);
14511 emit_move_insn (temp, val);
14514 XEXP (x, 1) = temp;
14515 return x;
14518 else if (REG_P (XEXP (x, 1)))
14520 rtx temp = gen_reg_rtx (Pmode);
14521 rtx val = force_operand (XEXP (x, 0), temp);
14522 if (val != temp)
14524 val = convert_to_mode (Pmode, val, 1);
14525 emit_move_insn (temp, val);
14528 XEXP (x, 0) = temp;
14529 return x;
14533 return x;
14536 /* Print an integer constant expression in assembler syntax. Addition
14537 and subtraction are the only arithmetic that may appear in these
14538 expressions. FILE is the stdio stream to write to, X is the rtx, and
14539 CODE is the operand print code from the output string. */
14541 static void
14542 output_pic_addr_const (FILE *file, rtx x, int code)
14544 char buf[256];
14546 switch (GET_CODE (x))
14548 case PC:
14549 gcc_assert (flag_pic);
14550 putc ('.', file);
14551 break;
14553 case SYMBOL_REF:
14554 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14555 output_addr_const (file, x);
14556 else
14558 const char *name = XSTR (x, 0);
14560 /* Mark the decl as referenced so that cgraph will
14561 output the function. */
14562 if (SYMBOL_REF_DECL (x))
14563 mark_decl_referenced (SYMBOL_REF_DECL (x));
14565 #if TARGET_MACHO
14566 if (MACHOPIC_INDIRECT
14567 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14568 name = machopic_indirection_name (x, /*stub_p=*/true);
14569 #endif
14570 assemble_name (file, name);
14572 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14573 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14574 fputs ("@PLT", file);
14575 break;
14577 case LABEL_REF:
14578 x = XEXP (x, 0);
14579 /* FALLTHRU */
14580 case CODE_LABEL:
14581 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14582 assemble_name (asm_out_file, buf);
14583 break;
14585 case CONST_INT:
14586 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14587 break;
14589 case CONST:
14590 /* This used to output parentheses around the expression,
14591 but that does not work on the 386 (either ATT or BSD assembler). */
14592 output_pic_addr_const (file, XEXP (x, 0), code);
14593 break;
14595 case CONST_DOUBLE:
14596 /* We can't handle floating point constants;
14597 TARGET_PRINT_OPERAND must handle them. */
14598 output_operand_lossage ("floating constant misused");
14599 break;
14601 case PLUS:
14602 /* Some assemblers need integer constants to appear first. */
14603 if (CONST_INT_P (XEXP (x, 0)))
14605 output_pic_addr_const (file, XEXP (x, 0), code);
14606 putc ('+', file);
14607 output_pic_addr_const (file, XEXP (x, 1), code);
14609 else
14611 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14612 output_pic_addr_const (file, XEXP (x, 1), code);
14613 putc ('+', file);
14614 output_pic_addr_const (file, XEXP (x, 0), code);
14616 break;
14618 case MINUS:
14619 if (!TARGET_MACHO)
14620 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14621 output_pic_addr_const (file, XEXP (x, 0), code);
14622 putc ('-', file);
14623 output_pic_addr_const (file, XEXP (x, 1), code);
14624 if (!TARGET_MACHO)
14625 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14626 break;
14628 case UNSPEC:
14629 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14631 bool f = i386_asm_output_addr_const_extra (file, x);
14632 gcc_assert (f);
14633 break;
14636 gcc_assert (XVECLEN (x, 0) == 1);
14637 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14638 switch (XINT (x, 1))
14640 case UNSPEC_GOT:
14641 fputs ("@GOT", file);
14642 break;
14643 case UNSPEC_GOTOFF:
14644 fputs ("@GOTOFF", file);
14645 break;
14646 case UNSPEC_PLTOFF:
14647 fputs ("@PLTOFF", file);
14648 break;
14649 case UNSPEC_PCREL:
14650 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14651 "(%rip)" : "[rip]", file);
14652 break;
14653 case UNSPEC_GOTPCREL:
14654 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14655 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14656 break;
14657 case UNSPEC_GOTTPOFF:
14658 /* FIXME: This might be @TPOFF in Sun ld too. */
14659 fputs ("@gottpoff", file);
14660 break;
14661 case UNSPEC_TPOFF:
14662 fputs ("@tpoff", file);
14663 break;
14664 case UNSPEC_NTPOFF:
14665 if (TARGET_64BIT)
14666 fputs ("@tpoff", file);
14667 else
14668 fputs ("@ntpoff", file);
14669 break;
14670 case UNSPEC_DTPOFF:
14671 fputs ("@dtpoff", file);
14672 break;
14673 case UNSPEC_GOTNTPOFF:
14674 if (TARGET_64BIT)
14675 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14676 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14677 else
14678 fputs ("@gotntpoff", file);
14679 break;
14680 case UNSPEC_INDNTPOFF:
14681 fputs ("@indntpoff", file);
14682 break;
14683 #if TARGET_MACHO
14684 case UNSPEC_MACHOPIC_OFFSET:
14685 putc ('-', file);
14686 machopic_output_function_base_name (file);
14687 break;
14688 #endif
14689 default:
14690 output_operand_lossage ("invalid UNSPEC as operand");
14691 break;
14693 break;
14695 default:
14696 output_operand_lossage ("invalid expression as operand");
14700 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14701 We need to emit DTP-relative relocations. */
14703 static void ATTRIBUTE_UNUSED
14704 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14706 fputs (ASM_LONG, file);
14707 output_addr_const (file, x);
14708 fputs ("@dtpoff", file);
14709 switch (size)
14711 case 4:
14712 break;
14713 case 8:
14714 fputs (", 0", file);
14715 break;
14716 default:
14717 gcc_unreachable ();
14721 /* Return true if X is a representation of the PIC register. This copes
14722 with calls from ix86_find_base_term, where the register might have
14723 been replaced by a cselib value. */
14725 static bool
14726 ix86_pic_register_p (rtx x)
14728 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14729 return (pic_offset_table_rtx
14730 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14731 else if (!REG_P (x))
14732 return false;
14733 else if (pic_offset_table_rtx)
14735 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14736 return true;
14737 if (HARD_REGISTER_P (x)
14738 && !HARD_REGISTER_P (pic_offset_table_rtx)
14739 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14740 return true;
14741 return false;
14743 else
14744 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14747 /* Helper function for ix86_delegitimize_address.
14748 Attempt to delegitimize TLS local-exec accesses. */
14750 static rtx
14751 ix86_delegitimize_tls_address (rtx orig_x)
14753 rtx x = orig_x, unspec;
14754 struct ix86_address addr;
14756 if (!TARGET_TLS_DIRECT_SEG_REFS)
14757 return orig_x;
14758 if (MEM_P (x))
14759 x = XEXP (x, 0);
14760 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14761 return orig_x;
14762 if (ix86_decompose_address (x, &addr) == 0
14763 || addr.seg != DEFAULT_TLS_SEG_REG
14764 || addr.disp == NULL_RTX
14765 || GET_CODE (addr.disp) != CONST)
14766 return orig_x;
14767 unspec = XEXP (addr.disp, 0);
14768 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14769 unspec = XEXP (unspec, 0);
14770 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14771 return orig_x;
14772 x = XVECEXP (unspec, 0, 0);
14773 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14774 if (unspec != XEXP (addr.disp, 0))
14775 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14776 if (addr.index)
14778 rtx idx = addr.index;
14779 if (addr.scale != 1)
14780 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14781 x = gen_rtx_PLUS (Pmode, idx, x);
14783 if (addr.base)
14784 x = gen_rtx_PLUS (Pmode, addr.base, x);
14785 if (MEM_P (orig_x))
14786 x = replace_equiv_address_nv (orig_x, x);
14787 return x;
14790 /* In the name of slightly smaller debug output, and to cater to
14791 general assembler lossage, recognize PIC+GOTOFF and turn it back
14792 into a direct symbol reference.
14794 On Darwin, this is necessary to avoid a crash, because Darwin
14795 has a different PIC label for each routine but the DWARF debugging
14796 information is not associated with any particular routine, so it's
14797 necessary to remove references to the PIC label from RTL stored by
14798 the DWARF output code. */
14800 static rtx
14801 ix86_delegitimize_address (rtx x)
14803 rtx orig_x = delegitimize_mem_from_attrs (x);
14804 /* addend is NULL or some rtx if x is something+GOTOFF where
14805 something doesn't include the PIC register. */
14806 rtx addend = NULL_RTX;
14807 /* reg_addend is NULL or a multiple of some register. */
14808 rtx reg_addend = NULL_RTX;
14809 /* const_addend is NULL or a const_int. */
14810 rtx const_addend = NULL_RTX;
14811 /* This is the result, or NULL. */
14812 rtx result = NULL_RTX;
14814 x = orig_x;
14816 if (MEM_P (x))
14817 x = XEXP (x, 0);
14819 if (TARGET_64BIT)
14821 if (GET_CODE (x) == CONST
14822 && GET_CODE (XEXP (x, 0)) == PLUS
14823 && GET_MODE (XEXP (x, 0)) == Pmode
14824 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14825 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14826 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14828 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14829 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14830 if (MEM_P (orig_x))
14831 x = replace_equiv_address_nv (orig_x, x);
14832 return x;
14835 if (GET_CODE (x) == CONST
14836 && GET_CODE (XEXP (x, 0)) == UNSPEC
14837 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14838 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14839 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14841 x = XVECEXP (XEXP (x, 0), 0, 0);
14842 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14844 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14845 GET_MODE (x), 0);
14846 if (x == NULL_RTX)
14847 return orig_x;
14849 return x;
14852 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14853 return ix86_delegitimize_tls_address (orig_x);
14855 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14856 and -mcmodel=medium -fpic. */
14859 if (GET_CODE (x) != PLUS
14860 || GET_CODE (XEXP (x, 1)) != CONST)
14861 return ix86_delegitimize_tls_address (orig_x);
14863 if (ix86_pic_register_p (XEXP (x, 0)))
14864 /* %ebx + GOT/GOTOFF */
14866 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14868 /* %ebx + %reg * scale + GOT/GOTOFF */
14869 reg_addend = XEXP (x, 0);
14870 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14871 reg_addend = XEXP (reg_addend, 1);
14872 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14873 reg_addend = XEXP (reg_addend, 0);
14874 else
14876 reg_addend = NULL_RTX;
14877 addend = XEXP (x, 0);
14880 else
14881 addend = XEXP (x, 0);
14883 x = XEXP (XEXP (x, 1), 0);
14884 if (GET_CODE (x) == PLUS
14885 && CONST_INT_P (XEXP (x, 1)))
14887 const_addend = XEXP (x, 1);
14888 x = XEXP (x, 0);
14891 if (GET_CODE (x) == UNSPEC
14892 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14893 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14894 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14895 && !MEM_P (orig_x) && !addend)))
14896 result = XVECEXP (x, 0, 0);
14898 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14899 && !MEM_P (orig_x))
14900 result = XVECEXP (x, 0, 0);
14902 if (! result)
14903 return ix86_delegitimize_tls_address (orig_x);
14905 if (const_addend)
14906 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14907 if (reg_addend)
14908 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14909 if (addend)
14911 /* If the rest of original X doesn't involve the PIC register, add
14912 addend and subtract pic_offset_table_rtx. This can happen e.g.
14913 for code like:
14914 leal (%ebx, %ecx, 4), %ecx
14916 movl foo@GOTOFF(%ecx), %edx
14917 in which case we return (%ecx - %ebx) + foo
14918 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
14919 and reload has completed. */
14920 if (pic_offset_table_rtx
14921 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
14922 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14923 pic_offset_table_rtx),
14924 result);
14925 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
14927 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
14928 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
14929 result = gen_rtx_PLUS (Pmode, tmp, result);
14931 else
14932 return orig_x;
14934 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14936 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14937 if (result == NULL_RTX)
14938 return orig_x;
14940 return result;
14943 /* If X is a machine specific address (i.e. a symbol or label being
14944 referenced as a displacement from the GOT implemented using an
14945 UNSPEC), then return the base term. Otherwise return X. */
14948 ix86_find_base_term (rtx x)
14950 rtx term;
14952 if (TARGET_64BIT)
14954 if (GET_CODE (x) != CONST)
14955 return x;
14956 term = XEXP (x, 0);
14957 if (GET_CODE (term) == PLUS
14958 && CONST_INT_P (XEXP (term, 1)))
14959 term = XEXP (term, 0);
14960 if (GET_CODE (term) != UNSPEC
14961 || (XINT (term, 1) != UNSPEC_GOTPCREL
14962 && XINT (term, 1) != UNSPEC_PCREL))
14963 return x;
14965 return XVECEXP (term, 0, 0);
14968 return ix86_delegitimize_address (x);
14971 static void
14972 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
14973 bool fp, FILE *file)
14975 const char *suffix;
14977 if (mode == CCFPmode || mode == CCFPUmode)
14979 code = ix86_fp_compare_code_to_integer (code);
14980 mode = CCmode;
14982 if (reverse)
14983 code = reverse_condition (code);
14985 switch (code)
14987 case EQ:
14988 switch (mode)
14990 case CCAmode:
14991 suffix = "a";
14992 break;
14994 case CCCmode:
14995 suffix = "c";
14996 break;
14998 case CCOmode:
14999 suffix = "o";
15000 break;
15002 case CCSmode:
15003 suffix = "s";
15004 break;
15006 default:
15007 suffix = "e";
15009 break;
15010 case NE:
15011 switch (mode)
15013 case CCAmode:
15014 suffix = "na";
15015 break;
15017 case CCCmode:
15018 suffix = "nc";
15019 break;
15021 case CCOmode:
15022 suffix = "no";
15023 break;
15025 case CCSmode:
15026 suffix = "ns";
15027 break;
15029 default:
15030 suffix = "ne";
15032 break;
15033 case GT:
15034 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
15035 suffix = "g";
15036 break;
15037 case GTU:
15038 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
15039 Those same assemblers have the same but opposite lossage on cmov. */
15040 if (mode == CCmode)
15041 suffix = fp ? "nbe" : "a";
15042 else
15043 gcc_unreachable ();
15044 break;
15045 case LT:
15046 switch (mode)
15048 case CCNOmode:
15049 case CCGOCmode:
15050 suffix = "s";
15051 break;
15053 case CCmode:
15054 case CCGCmode:
15055 suffix = "l";
15056 break;
15058 default:
15059 gcc_unreachable ();
15061 break;
15062 case LTU:
15063 if (mode == CCmode)
15064 suffix = "b";
15065 else if (mode == CCCmode)
15066 suffix = fp ? "b" : "c";
15067 else
15068 gcc_unreachable ();
15069 break;
15070 case GE:
15071 switch (mode)
15073 case CCNOmode:
15074 case CCGOCmode:
15075 suffix = "ns";
15076 break;
15078 case CCmode:
15079 case CCGCmode:
15080 suffix = "ge";
15081 break;
15083 default:
15084 gcc_unreachable ();
15086 break;
15087 case GEU:
15088 if (mode == CCmode)
15089 suffix = "nb";
15090 else if (mode == CCCmode)
15091 suffix = fp ? "nb" : "nc";
15092 else
15093 gcc_unreachable ();
15094 break;
15095 case LE:
15096 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
15097 suffix = "le";
15098 break;
15099 case LEU:
15100 if (mode == CCmode)
15101 suffix = "be";
15102 else
15103 gcc_unreachable ();
15104 break;
15105 case UNORDERED:
15106 suffix = fp ? "u" : "p";
15107 break;
15108 case ORDERED:
15109 suffix = fp ? "nu" : "np";
15110 break;
15111 default:
15112 gcc_unreachable ();
15114 fputs (suffix, file);
15117 /* Print the name of register X to FILE based on its machine mode and number.
15118 If CODE is 'w', pretend the mode is HImode.
15119 If CODE is 'b', pretend the mode is QImode.
15120 If CODE is 'k', pretend the mode is SImode.
15121 If CODE is 'q', pretend the mode is DImode.
15122 If CODE is 'x', pretend the mode is V4SFmode.
15123 If CODE is 't', pretend the mode is V8SFmode.
15124 If CODE is 'g', pretend the mode is V16SFmode.
15125 If CODE is 'h', pretend the reg is the 'high' byte register.
15126 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
15127 If CODE is 'd', duplicate the operand for AVX instruction.
15130 void
15131 print_reg (rtx x, int code, FILE *file)
15133 const char *reg;
15134 int msize;
15135 unsigned int regno;
15136 bool duplicated;
15138 if (ASSEMBLER_DIALECT == ASM_ATT)
15139 putc ('%', file);
15141 if (x == pc_rtx)
15143 gcc_assert (TARGET_64BIT);
15144 fputs ("rip", file);
15145 return;
15148 if (code == 'y' && STACK_TOP_P (x))
15150 fputs ("st(0)", file);
15151 return;
15154 if (code == 'w')
15155 msize = 2;
15156 else if (code == 'b')
15157 msize = 1;
15158 else if (code == 'k')
15159 msize = 4;
15160 else if (code == 'q')
15161 msize = 8;
15162 else if (code == 'h')
15163 msize = 0;
15164 else if (code == 'x')
15165 msize = 16;
15166 else if (code == 't')
15167 msize = 32;
15168 else if (code == 'g')
15169 msize = 64;
15170 else
15171 msize = GET_MODE_SIZE (GET_MODE (x));
15173 regno = true_regnum (x);
15175 gcc_assert (regno != ARG_POINTER_REGNUM
15176 && regno != FRAME_POINTER_REGNUM
15177 && regno != FLAGS_REG
15178 && regno != FPSR_REG
15179 && regno != FPCR_REG);
15181 duplicated = code == 'd' && TARGET_AVX;
15183 switch (msize)
15185 case 8:
15186 case 4:
15187 if (LEGACY_INT_REGNO_P (regno))
15188 putc (msize == 8 ? 'r' : 'e', file);
15189 case 16:
15190 case 12:
15191 case 2:
15192 normal:
15193 reg = hi_reg_name[regno];
15194 break;
15195 case 1:
15196 if (regno >= ARRAY_SIZE (qi_reg_name))
15197 goto normal;
15198 reg = qi_reg_name[regno];
15199 break;
15200 case 0:
15201 if (regno >= ARRAY_SIZE (qi_high_reg_name))
15202 goto normal;
15203 reg = qi_high_reg_name[regno];
15204 break;
15205 case 32:
15206 case 64:
15207 if (SSE_REGNO_P (regno))
15209 gcc_assert (!duplicated);
15210 putc (msize == 32 ? 'y' : 'z', file);
15211 reg = hi_reg_name[regno] + 1;
15212 break;
15214 goto normal;
15215 default:
15216 gcc_unreachable ();
15219 fputs (reg, file);
15221 /* Irritatingly, AMD extended registers use
15222 different naming convention: "r%d[bwd]" */
15223 if (REX_INT_REGNO_P (regno))
15225 gcc_assert (TARGET_64BIT);
15226 switch (msize)
15228 case 0:
15229 error ("extended registers have no high halves");
15230 break;
15231 case 1:
15232 putc ('b', file);
15233 break;
15234 case 2:
15235 putc ('w', file);
15236 break;
15237 case 4:
15238 putc ('d', file);
15239 break;
15240 case 8:
15241 /* no suffix */
15242 break;
15243 default:
15244 error ("unsupported operand size for extended register");
15245 break;
15247 return;
15250 if (duplicated)
15252 if (ASSEMBLER_DIALECT == ASM_ATT)
15253 fprintf (file, ", %%%s", reg);
15254 else
15255 fprintf (file, ", %s", reg);
15259 /* Meaning of CODE:
15260 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15261 C -- print opcode suffix for set/cmov insn.
15262 c -- like C, but print reversed condition
15263 F,f -- likewise, but for floating-point.
15264 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15265 otherwise nothing
15266 R -- print embeded rounding and sae.
15267 r -- print only sae.
15268 z -- print the opcode suffix for the size of the current operand.
15269 Z -- likewise, with special suffixes for x87 instructions.
15270 * -- print a star (in certain assembler syntax)
15271 A -- print an absolute memory reference.
15272 E -- print address with DImode register names if TARGET_64BIT.
15273 w -- print the operand as if it's a "word" (HImode) even if it isn't.
15274 s -- print a shift double count, followed by the assemblers argument
15275 delimiter.
15276 b -- print the QImode name of the register for the indicated operand.
15277 %b0 would print %al if operands[0] is reg 0.
15278 w -- likewise, print the HImode name of the register.
15279 k -- likewise, print the SImode name of the register.
15280 q -- likewise, print the DImode name of the register.
15281 x -- likewise, print the V4SFmode name of the register.
15282 t -- likewise, print the V8SFmode name of the register.
15283 g -- likewise, print the V16SFmode name of the register.
15284 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15285 y -- print "st(0)" instead of "st" as a register.
15286 d -- print duplicated register operand for AVX instruction.
15287 D -- print condition for SSE cmp instruction.
15288 P -- if PIC, print an @PLT suffix.
15289 p -- print raw symbol name.
15290 X -- don't print any sort of PIC '@' suffix for a symbol.
15291 & -- print some in-use local-dynamic symbol name.
15292 H -- print a memory address offset by 8; used for sse high-parts
15293 Y -- print condition for XOP pcom* instruction.
15294 + -- print a branch hint as 'cs' or 'ds' prefix
15295 ; -- print a semicolon (after prefixes due to bug in older gas).
15296 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15297 @ -- print a segment register of thread base pointer load
15298 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15299 ! -- print MPX prefix for jxx/call/ret instructions if required.
15302 void
15303 ix86_print_operand (FILE *file, rtx x, int code)
15305 if (code)
15307 switch (code)
15309 case 'A':
15310 switch (ASSEMBLER_DIALECT)
15312 case ASM_ATT:
15313 putc ('*', file);
15314 break;
15316 case ASM_INTEL:
15317 /* Intel syntax. For absolute addresses, registers should not
15318 be surrounded by braces. */
15319 if (!REG_P (x))
15321 putc ('[', file);
15322 ix86_print_operand (file, x, 0);
15323 putc (']', file);
15324 return;
15326 break;
15328 default:
15329 gcc_unreachable ();
15332 ix86_print_operand (file, x, 0);
15333 return;
15335 case 'E':
15336 /* Wrap address in an UNSPEC to declare special handling. */
15337 if (TARGET_64BIT)
15338 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15340 output_address (x);
15341 return;
15343 case 'L':
15344 if (ASSEMBLER_DIALECT == ASM_ATT)
15345 putc ('l', file);
15346 return;
15348 case 'W':
15349 if (ASSEMBLER_DIALECT == ASM_ATT)
15350 putc ('w', file);
15351 return;
15353 case 'B':
15354 if (ASSEMBLER_DIALECT == ASM_ATT)
15355 putc ('b', file);
15356 return;
15358 case 'Q':
15359 if (ASSEMBLER_DIALECT == ASM_ATT)
15360 putc ('l', file);
15361 return;
15363 case 'S':
15364 if (ASSEMBLER_DIALECT == ASM_ATT)
15365 putc ('s', file);
15366 return;
15368 case 'T':
15369 if (ASSEMBLER_DIALECT == ASM_ATT)
15370 putc ('t', file);
15371 return;
15373 case 'O':
15374 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15375 if (ASSEMBLER_DIALECT != ASM_ATT)
15376 return;
15378 switch (GET_MODE_SIZE (GET_MODE (x)))
15380 case 2:
15381 putc ('w', file);
15382 break;
15384 case 4:
15385 putc ('l', file);
15386 break;
15388 case 8:
15389 putc ('q', file);
15390 break;
15392 default:
15393 output_operand_lossage
15394 ("invalid operand size for operand code 'O'");
15395 return;
15398 putc ('.', file);
15399 #endif
15400 return;
15402 case 'z':
15403 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15405 /* Opcodes don't get size suffixes if using Intel opcodes. */
15406 if (ASSEMBLER_DIALECT == ASM_INTEL)
15407 return;
15409 switch (GET_MODE_SIZE (GET_MODE (x)))
15411 case 1:
15412 putc ('b', file);
15413 return;
15415 case 2:
15416 putc ('w', file);
15417 return;
15419 case 4:
15420 putc ('l', file);
15421 return;
15423 case 8:
15424 putc ('q', file);
15425 return;
15427 default:
15428 output_operand_lossage
15429 ("invalid operand size for operand code 'z'");
15430 return;
15434 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15435 warning
15436 (0, "non-integer operand used with operand code 'z'");
15437 /* FALLTHRU */
15439 case 'Z':
15440 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15441 if (ASSEMBLER_DIALECT == ASM_INTEL)
15442 return;
15444 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15446 switch (GET_MODE_SIZE (GET_MODE (x)))
15448 case 2:
15449 #ifdef HAVE_AS_IX86_FILDS
15450 putc ('s', file);
15451 #endif
15452 return;
15454 case 4:
15455 putc ('l', file);
15456 return;
15458 case 8:
15459 #ifdef HAVE_AS_IX86_FILDQ
15460 putc ('q', file);
15461 #else
15462 fputs ("ll", file);
15463 #endif
15464 return;
15466 default:
15467 break;
15470 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15472 /* 387 opcodes don't get size suffixes
15473 if the operands are registers. */
15474 if (STACK_REG_P (x))
15475 return;
15477 switch (GET_MODE_SIZE (GET_MODE (x)))
15479 case 4:
15480 putc ('s', file);
15481 return;
15483 case 8:
15484 putc ('l', file);
15485 return;
15487 case 12:
15488 case 16:
15489 putc ('t', file);
15490 return;
15492 default:
15493 break;
15496 else
15498 output_operand_lossage
15499 ("invalid operand type used with operand code 'Z'");
15500 return;
15503 output_operand_lossage
15504 ("invalid operand size for operand code 'Z'");
15505 return;
15507 case 'd':
15508 case 'b':
15509 case 'w':
15510 case 'k':
15511 case 'q':
15512 case 'h':
15513 case 't':
15514 case 'g':
15515 case 'y':
15516 case 'x':
15517 case 'X':
15518 case 'P':
15519 case 'p':
15520 break;
15522 case 's':
15523 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15525 ix86_print_operand (file, x, 0);
15526 fputs (", ", file);
15528 return;
15530 case 'Y':
15531 switch (GET_CODE (x))
15533 case NE:
15534 fputs ("neq", file);
15535 break;
15536 case EQ:
15537 fputs ("eq", file);
15538 break;
15539 case GE:
15540 case GEU:
15541 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15542 break;
15543 case GT:
15544 case GTU:
15545 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15546 break;
15547 case LE:
15548 case LEU:
15549 fputs ("le", file);
15550 break;
15551 case LT:
15552 case LTU:
15553 fputs ("lt", file);
15554 break;
15555 case UNORDERED:
15556 fputs ("unord", file);
15557 break;
15558 case ORDERED:
15559 fputs ("ord", file);
15560 break;
15561 case UNEQ:
15562 fputs ("ueq", file);
15563 break;
15564 case UNGE:
15565 fputs ("nlt", file);
15566 break;
15567 case UNGT:
15568 fputs ("nle", file);
15569 break;
15570 case UNLE:
15571 fputs ("ule", file);
15572 break;
15573 case UNLT:
15574 fputs ("ult", file);
15575 break;
15576 case LTGT:
15577 fputs ("une", file);
15578 break;
15579 default:
15580 output_operand_lossage ("operand is not a condition code, "
15581 "invalid operand code 'Y'");
15582 return;
15584 return;
15586 case 'D':
15587 /* Little bit of braindamage here. The SSE compare instructions
15588 does use completely different names for the comparisons that the
15589 fp conditional moves. */
15590 switch (GET_CODE (x))
15592 case UNEQ:
15593 if (TARGET_AVX)
15595 fputs ("eq_us", file);
15596 break;
15598 case EQ:
15599 fputs ("eq", file);
15600 break;
15601 case UNLT:
15602 if (TARGET_AVX)
15604 fputs ("nge", file);
15605 break;
15607 case LT:
15608 fputs ("lt", file);
15609 break;
15610 case UNLE:
15611 if (TARGET_AVX)
15613 fputs ("ngt", file);
15614 break;
15616 case LE:
15617 fputs ("le", file);
15618 break;
15619 case UNORDERED:
15620 fputs ("unord", file);
15621 break;
15622 case LTGT:
15623 if (TARGET_AVX)
15625 fputs ("neq_oq", file);
15626 break;
15628 case NE:
15629 fputs ("neq", file);
15630 break;
15631 case GE:
15632 if (TARGET_AVX)
15634 fputs ("ge", file);
15635 break;
15637 case UNGE:
15638 fputs ("nlt", file);
15639 break;
15640 case GT:
15641 if (TARGET_AVX)
15643 fputs ("gt", file);
15644 break;
15646 case UNGT:
15647 fputs ("nle", file);
15648 break;
15649 case ORDERED:
15650 fputs ("ord", file);
15651 break;
15652 default:
15653 output_operand_lossage ("operand is not a condition code, "
15654 "invalid operand code 'D'");
15655 return;
15657 return;
15659 case 'F':
15660 case 'f':
15661 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15662 if (ASSEMBLER_DIALECT == ASM_ATT)
15663 putc ('.', file);
15664 #endif
15666 case 'C':
15667 case 'c':
15668 if (!COMPARISON_P (x))
15670 output_operand_lossage ("operand is not a condition code, "
15671 "invalid operand code '%c'", code);
15672 return;
15674 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15675 code == 'c' || code == 'f',
15676 code == 'F' || code == 'f',
15677 file);
15678 return;
15680 case 'H':
15681 if (!offsettable_memref_p (x))
15683 output_operand_lossage ("operand is not an offsettable memory "
15684 "reference, invalid operand code 'H'");
15685 return;
15687 /* It doesn't actually matter what mode we use here, as we're
15688 only going to use this for printing. */
15689 x = adjust_address_nv (x, DImode, 8);
15690 /* Output 'qword ptr' for intel assembler dialect. */
15691 if (ASSEMBLER_DIALECT == ASM_INTEL)
15692 code = 'q';
15693 break;
15695 case 'K':
15696 gcc_assert (CONST_INT_P (x));
15698 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15699 #ifdef HAVE_AS_IX86_HLE
15700 fputs ("xacquire ", file);
15701 #else
15702 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15703 #endif
15704 else if (INTVAL (x) & IX86_HLE_RELEASE)
15705 #ifdef HAVE_AS_IX86_HLE
15706 fputs ("xrelease ", file);
15707 #else
15708 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15709 #endif
15710 /* We do not want to print value of the operand. */
15711 return;
15713 case 'N':
15714 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15715 fputs ("{z}", file);
15716 return;
15718 case 'r':
15719 gcc_assert (CONST_INT_P (x));
15720 gcc_assert (INTVAL (x) == ROUND_SAE);
15722 if (ASSEMBLER_DIALECT == ASM_INTEL)
15723 fputs (", ", file);
15725 fputs ("{sae}", file);
15727 if (ASSEMBLER_DIALECT == ASM_ATT)
15728 fputs (", ", file);
15730 return;
15732 case 'R':
15733 gcc_assert (CONST_INT_P (x));
15735 if (ASSEMBLER_DIALECT == ASM_INTEL)
15736 fputs (", ", file);
15738 switch (INTVAL (x))
15740 case ROUND_NEAREST_INT | ROUND_SAE:
15741 fputs ("{rn-sae}", file);
15742 break;
15743 case ROUND_NEG_INF | ROUND_SAE:
15744 fputs ("{rd-sae}", file);
15745 break;
15746 case ROUND_POS_INF | ROUND_SAE:
15747 fputs ("{ru-sae}", file);
15748 break;
15749 case ROUND_ZERO | ROUND_SAE:
15750 fputs ("{rz-sae}", file);
15751 break;
15752 default:
15753 gcc_unreachable ();
15756 if (ASSEMBLER_DIALECT == ASM_ATT)
15757 fputs (", ", file);
15759 return;
15761 case '*':
15762 if (ASSEMBLER_DIALECT == ASM_ATT)
15763 putc ('*', file);
15764 return;
15766 case '&':
15768 const char *name = get_some_local_dynamic_name ();
15769 if (name == NULL)
15770 output_operand_lossage ("'%%&' used without any "
15771 "local dynamic TLS references");
15772 else
15773 assemble_name (file, name);
15774 return;
15777 case '+':
15779 rtx x;
15781 if (!optimize
15782 || optimize_function_for_size_p (cfun)
15783 || !TARGET_BRANCH_PREDICTION_HINTS)
15784 return;
15786 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15787 if (x)
15789 int pred_val = XINT (x, 0);
15791 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15792 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15794 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15795 bool cputaken
15796 = final_forward_branch_p (current_output_insn) == 0;
15798 /* Emit hints only in the case default branch prediction
15799 heuristics would fail. */
15800 if (taken != cputaken)
15802 /* We use 3e (DS) prefix for taken branches and
15803 2e (CS) prefix for not taken branches. */
15804 if (taken)
15805 fputs ("ds ; ", file);
15806 else
15807 fputs ("cs ; ", file);
15811 return;
15814 case ';':
15815 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15816 putc (';', file);
15817 #endif
15818 return;
15820 case '@':
15821 if (ASSEMBLER_DIALECT == ASM_ATT)
15822 putc ('%', file);
15824 /* The kernel uses a different segment register for performance
15825 reasons; a system call would not have to trash the userspace
15826 segment register, which would be expensive. */
15827 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15828 fputs ("fs", file);
15829 else
15830 fputs ("gs", file);
15831 return;
15833 case '~':
15834 putc (TARGET_AVX2 ? 'i' : 'f', file);
15835 return;
15837 case '^':
15838 if (TARGET_64BIT && Pmode != word_mode)
15839 fputs ("addr32 ", file);
15840 return;
15842 case '!':
15843 if (ix86_bnd_prefixed_insn_p (current_output_insn))
15844 fputs ("bnd ", file);
15845 return;
15847 default:
15848 output_operand_lossage ("invalid operand code '%c'", code);
15852 if (REG_P (x))
15853 print_reg (x, code, file);
15855 else if (MEM_P (x))
15857 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15858 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15859 && GET_MODE (x) != BLKmode)
15861 const char * size;
15862 switch (GET_MODE_SIZE (GET_MODE (x)))
15864 case 1: size = "BYTE"; break;
15865 case 2: size = "WORD"; break;
15866 case 4: size = "DWORD"; break;
15867 case 8: size = "QWORD"; break;
15868 case 12: size = "TBYTE"; break;
15869 case 16:
15870 if (GET_MODE (x) == XFmode)
15871 size = "TBYTE";
15872 else
15873 size = "XMMWORD";
15874 break;
15875 case 32: size = "YMMWORD"; break;
15876 case 64: size = "ZMMWORD"; break;
15877 default:
15878 gcc_unreachable ();
15881 /* Check for explicit size override (codes 'b', 'w', 'k',
15882 'q' and 'x') */
15883 if (code == 'b')
15884 size = "BYTE";
15885 else if (code == 'w')
15886 size = "WORD";
15887 else if (code == 'k')
15888 size = "DWORD";
15889 else if (code == 'q')
15890 size = "QWORD";
15891 else if (code == 'x')
15892 size = "XMMWORD";
15894 fputs (size, file);
15895 fputs (" PTR ", file);
15898 x = XEXP (x, 0);
15899 /* Avoid (%rip) for call operands. */
15900 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15901 && !CONST_INT_P (x))
15902 output_addr_const (file, x);
15903 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15904 output_operand_lossage ("invalid constraints for operand");
15905 else
15906 output_address (x);
15909 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
15911 REAL_VALUE_TYPE r;
15912 long l;
15914 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15915 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15917 if (ASSEMBLER_DIALECT == ASM_ATT)
15918 putc ('$', file);
15919 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15920 if (code == 'q')
15921 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15922 (unsigned long long) (int) l);
15923 else
15924 fprintf (file, "0x%08x", (unsigned int) l);
15927 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
15929 REAL_VALUE_TYPE r;
15930 long l[2];
15932 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15933 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15935 if (ASSEMBLER_DIALECT == ASM_ATT)
15936 putc ('$', file);
15937 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15940 /* These float cases don't actually occur as immediate operands. */
15941 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
15943 char dstr[30];
15945 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15946 fputs (dstr, file);
15949 else
15951 /* We have patterns that allow zero sets of memory, for instance.
15952 In 64-bit mode, we should probably support all 8-byte vectors,
15953 since we can in fact encode that into an immediate. */
15954 if (GET_CODE (x) == CONST_VECTOR)
15956 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15957 x = const0_rtx;
15960 if (code != 'P' && code != 'p')
15962 if (CONST_INT_P (x))
15964 if (ASSEMBLER_DIALECT == ASM_ATT)
15965 putc ('$', file);
15967 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15968 || GET_CODE (x) == LABEL_REF)
15970 if (ASSEMBLER_DIALECT == ASM_ATT)
15971 putc ('$', file);
15972 else
15973 fputs ("OFFSET FLAT:", file);
15976 if (CONST_INT_P (x))
15977 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15978 else if (flag_pic || MACHOPIC_INDIRECT)
15979 output_pic_addr_const (file, x, code);
15980 else
15981 output_addr_const (file, x);
15985 static bool
15986 ix86_print_operand_punct_valid_p (unsigned char code)
15988 return (code == '@' || code == '*' || code == '+' || code == '&'
15989 || code == ';' || code == '~' || code == '^' || code == '!');
15992 /* Print a memory operand whose address is ADDR. */
15994 static void
15995 ix86_print_operand_address (FILE *file, rtx addr)
15997 struct ix86_address parts;
15998 rtx base, index, disp;
15999 int scale;
16000 int ok;
16001 bool vsib = false;
16002 int code = 0;
16004 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
16006 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16007 gcc_assert (parts.index == NULL_RTX);
16008 parts.index = XVECEXP (addr, 0, 1);
16009 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
16010 addr = XVECEXP (addr, 0, 0);
16011 vsib = true;
16013 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
16015 gcc_assert (TARGET_64BIT);
16016 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16017 code = 'q';
16019 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
16021 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
16022 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
16023 if (parts.base != NULL_RTX)
16025 parts.index = parts.base;
16026 parts.scale = 1;
16028 parts.base = XVECEXP (addr, 0, 0);
16029 addr = XVECEXP (addr, 0, 0);
16031 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
16033 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16034 gcc_assert (parts.index == NULL_RTX);
16035 parts.index = XVECEXP (addr, 0, 1);
16036 addr = XVECEXP (addr, 0, 0);
16038 else
16039 ok = ix86_decompose_address (addr, &parts);
16041 gcc_assert (ok);
16043 base = parts.base;
16044 index = parts.index;
16045 disp = parts.disp;
16046 scale = parts.scale;
16048 switch (parts.seg)
16050 case SEG_DEFAULT:
16051 break;
16052 case SEG_FS:
16053 case SEG_GS:
16054 if (ASSEMBLER_DIALECT == ASM_ATT)
16055 putc ('%', file);
16056 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
16057 break;
16058 default:
16059 gcc_unreachable ();
16062 /* Use one byte shorter RIP relative addressing for 64bit mode. */
16063 if (TARGET_64BIT && !base && !index)
16065 rtx symbol = disp;
16067 if (GET_CODE (disp) == CONST
16068 && GET_CODE (XEXP (disp, 0)) == PLUS
16069 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16070 symbol = XEXP (XEXP (disp, 0), 0);
16072 if (GET_CODE (symbol) == LABEL_REF
16073 || (GET_CODE (symbol) == SYMBOL_REF
16074 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
16075 base = pc_rtx;
16077 if (!base && !index)
16079 /* Displacement only requires special attention. */
16081 if (CONST_INT_P (disp))
16083 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
16084 fputs ("ds:", file);
16085 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
16087 else if (flag_pic)
16088 output_pic_addr_const (file, disp, 0);
16089 else
16090 output_addr_const (file, disp);
16092 else
16094 /* Print SImode register names to force addr32 prefix. */
16095 if (SImode_address_operand (addr, VOIDmode))
16097 #ifdef ENABLE_CHECKING
16098 gcc_assert (TARGET_64BIT);
16099 switch (GET_CODE (addr))
16101 case SUBREG:
16102 gcc_assert (GET_MODE (addr) == SImode);
16103 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
16104 break;
16105 case ZERO_EXTEND:
16106 case AND:
16107 gcc_assert (GET_MODE (addr) == DImode);
16108 break;
16109 default:
16110 gcc_unreachable ();
16112 #endif
16113 gcc_assert (!code);
16114 code = 'k';
16116 else if (code == 0
16117 && TARGET_X32
16118 && disp
16119 && CONST_INT_P (disp)
16120 && INTVAL (disp) < -16*1024*1024)
16122 /* X32 runs in 64-bit mode, where displacement, DISP, in
16123 address DISP(%r64), is encoded as 32-bit immediate sign-
16124 extended from 32-bit to 64-bit. For -0x40000300(%r64),
16125 address is %r64 + 0xffffffffbffffd00. When %r64 <
16126 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
16127 which is invalid for x32. The correct address is %r64
16128 - 0x40000300 == 0xf7ffdd64. To properly encode
16129 -0x40000300(%r64) for x32, we zero-extend negative
16130 displacement by forcing addr32 prefix which truncates
16131 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
16132 zero-extend all negative displacements, including -1(%rsp).
16133 However, for small negative displacements, sign-extension
16134 won't cause overflow. We only zero-extend negative
16135 displacements if they < -16*1024*1024, which is also used
16136 to check legitimate address displacements for PIC. */
16137 code = 'k';
16140 if (ASSEMBLER_DIALECT == ASM_ATT)
16142 if (disp)
16144 if (flag_pic)
16145 output_pic_addr_const (file, disp, 0);
16146 else if (GET_CODE (disp) == LABEL_REF)
16147 output_asm_label (disp);
16148 else
16149 output_addr_const (file, disp);
16152 putc ('(', file);
16153 if (base)
16154 print_reg (base, code, file);
16155 if (index)
16157 putc (',', file);
16158 print_reg (index, vsib ? 0 : code, file);
16159 if (scale != 1 || vsib)
16160 fprintf (file, ",%d", scale);
16162 putc (')', file);
16164 else
16166 rtx offset = NULL_RTX;
16168 if (disp)
16170 /* Pull out the offset of a symbol; print any symbol itself. */
16171 if (GET_CODE (disp) == CONST
16172 && GET_CODE (XEXP (disp, 0)) == PLUS
16173 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16175 offset = XEXP (XEXP (disp, 0), 1);
16176 disp = gen_rtx_CONST (VOIDmode,
16177 XEXP (XEXP (disp, 0), 0));
16180 if (flag_pic)
16181 output_pic_addr_const (file, disp, 0);
16182 else if (GET_CODE (disp) == LABEL_REF)
16183 output_asm_label (disp);
16184 else if (CONST_INT_P (disp))
16185 offset = disp;
16186 else
16187 output_addr_const (file, disp);
16190 putc ('[', file);
16191 if (base)
16193 print_reg (base, code, file);
16194 if (offset)
16196 if (INTVAL (offset) >= 0)
16197 putc ('+', file);
16198 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16201 else if (offset)
16202 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16203 else
16204 putc ('0', file);
16206 if (index)
16208 putc ('+', file);
16209 print_reg (index, vsib ? 0 : code, file);
16210 if (scale != 1 || vsib)
16211 fprintf (file, "*%d", scale);
16213 putc (']', file);
16218 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
16220 static bool
16221 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16223 rtx op;
16225 if (GET_CODE (x) != UNSPEC)
16226 return false;
16228 op = XVECEXP (x, 0, 0);
16229 switch (XINT (x, 1))
16231 case UNSPEC_GOTTPOFF:
16232 output_addr_const (file, op);
16233 /* FIXME: This might be @TPOFF in Sun ld. */
16234 fputs ("@gottpoff", file);
16235 break;
16236 case UNSPEC_TPOFF:
16237 output_addr_const (file, op);
16238 fputs ("@tpoff", file);
16239 break;
16240 case UNSPEC_NTPOFF:
16241 output_addr_const (file, op);
16242 if (TARGET_64BIT)
16243 fputs ("@tpoff", file);
16244 else
16245 fputs ("@ntpoff", file);
16246 break;
16247 case UNSPEC_DTPOFF:
16248 output_addr_const (file, op);
16249 fputs ("@dtpoff", file);
16250 break;
16251 case UNSPEC_GOTNTPOFF:
16252 output_addr_const (file, op);
16253 if (TARGET_64BIT)
16254 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16255 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16256 else
16257 fputs ("@gotntpoff", file);
16258 break;
16259 case UNSPEC_INDNTPOFF:
16260 output_addr_const (file, op);
16261 fputs ("@indntpoff", file);
16262 break;
16263 #if TARGET_MACHO
16264 case UNSPEC_MACHOPIC_OFFSET:
16265 output_addr_const (file, op);
16266 putc ('-', file);
16267 machopic_output_function_base_name (file);
16268 break;
16269 #endif
16271 case UNSPEC_STACK_CHECK:
16273 int offset;
16275 gcc_assert (flag_split_stack);
16277 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16278 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16279 #else
16280 gcc_unreachable ();
16281 #endif
16283 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16285 break;
16287 default:
16288 return false;
16291 return true;
16294 /* Split one or more double-mode RTL references into pairs of half-mode
16295 references. The RTL can be REG, offsettable MEM, integer constant, or
16296 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16297 split and "num" is its length. lo_half and hi_half are output arrays
16298 that parallel "operands". */
16300 void
16301 split_double_mode (machine_mode mode, rtx operands[],
16302 int num, rtx lo_half[], rtx hi_half[])
16304 machine_mode half_mode;
16305 unsigned int byte;
16307 switch (mode)
16309 case TImode:
16310 half_mode = DImode;
16311 break;
16312 case DImode:
16313 half_mode = SImode;
16314 break;
16315 default:
16316 gcc_unreachable ();
16319 byte = GET_MODE_SIZE (half_mode);
16321 while (num--)
16323 rtx op = operands[num];
16325 /* simplify_subreg refuse to split volatile memory addresses,
16326 but we still have to handle it. */
16327 if (MEM_P (op))
16329 lo_half[num] = adjust_address (op, half_mode, 0);
16330 hi_half[num] = adjust_address (op, half_mode, byte);
16332 else
16334 lo_half[num] = simplify_gen_subreg (half_mode, op,
16335 GET_MODE (op) == VOIDmode
16336 ? mode : GET_MODE (op), 0);
16337 hi_half[num] = simplify_gen_subreg (half_mode, op,
16338 GET_MODE (op) == VOIDmode
16339 ? mode : GET_MODE (op), byte);
16344 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16345 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16346 is the expression of the binary operation. The output may either be
16347 emitted here, or returned to the caller, like all output_* functions.
16349 There is no guarantee that the operands are the same mode, as they
16350 might be within FLOAT or FLOAT_EXTEND expressions. */
16352 #ifndef SYSV386_COMPAT
16353 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16354 wants to fix the assemblers because that causes incompatibility
16355 with gcc. No-one wants to fix gcc because that causes
16356 incompatibility with assemblers... You can use the option of
16357 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16358 #define SYSV386_COMPAT 1
16359 #endif
16361 const char *
16362 output_387_binary_op (rtx insn, rtx *operands)
16364 static char buf[40];
16365 const char *p;
16366 const char *ssep;
16367 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16369 #ifdef ENABLE_CHECKING
16370 /* Even if we do not want to check the inputs, this documents input
16371 constraints. Which helps in understanding the following code. */
16372 if (STACK_REG_P (operands[0])
16373 && ((REG_P (operands[1])
16374 && REGNO (operands[0]) == REGNO (operands[1])
16375 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16376 || (REG_P (operands[2])
16377 && REGNO (operands[0]) == REGNO (operands[2])
16378 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16379 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16380 ; /* ok */
16381 else
16382 gcc_assert (is_sse);
16383 #endif
16385 switch (GET_CODE (operands[3]))
16387 case PLUS:
16388 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16389 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16390 p = "fiadd";
16391 else
16392 p = "fadd";
16393 ssep = "vadd";
16394 break;
16396 case MINUS:
16397 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16398 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16399 p = "fisub";
16400 else
16401 p = "fsub";
16402 ssep = "vsub";
16403 break;
16405 case MULT:
16406 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16407 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16408 p = "fimul";
16409 else
16410 p = "fmul";
16411 ssep = "vmul";
16412 break;
16414 case DIV:
16415 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16416 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16417 p = "fidiv";
16418 else
16419 p = "fdiv";
16420 ssep = "vdiv";
16421 break;
16423 default:
16424 gcc_unreachable ();
16427 if (is_sse)
16429 if (TARGET_AVX)
16431 strcpy (buf, ssep);
16432 if (GET_MODE (operands[0]) == SFmode)
16433 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16434 else
16435 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16437 else
16439 strcpy (buf, ssep + 1);
16440 if (GET_MODE (operands[0]) == SFmode)
16441 strcat (buf, "ss\t{%2, %0|%0, %2}");
16442 else
16443 strcat (buf, "sd\t{%2, %0|%0, %2}");
16445 return buf;
16447 strcpy (buf, p);
16449 switch (GET_CODE (operands[3]))
16451 case MULT:
16452 case PLUS:
16453 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16454 std::swap (operands[1], operands[2]);
16456 /* know operands[0] == operands[1]. */
16458 if (MEM_P (operands[2]))
16460 p = "%Z2\t%2";
16461 break;
16464 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16466 if (STACK_TOP_P (operands[0]))
16467 /* How is it that we are storing to a dead operand[2]?
16468 Well, presumably operands[1] is dead too. We can't
16469 store the result to st(0) as st(0) gets popped on this
16470 instruction. Instead store to operands[2] (which I
16471 think has to be st(1)). st(1) will be popped later.
16472 gcc <= 2.8.1 didn't have this check and generated
16473 assembly code that the Unixware assembler rejected. */
16474 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16475 else
16476 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16477 break;
16480 if (STACK_TOP_P (operands[0]))
16481 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16482 else
16483 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16484 break;
16486 case MINUS:
16487 case DIV:
16488 if (MEM_P (operands[1]))
16490 p = "r%Z1\t%1";
16491 break;
16494 if (MEM_P (operands[2]))
16496 p = "%Z2\t%2";
16497 break;
16500 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16502 #if SYSV386_COMPAT
16503 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16504 derived assemblers, confusingly reverse the direction of
16505 the operation for fsub{r} and fdiv{r} when the
16506 destination register is not st(0). The Intel assembler
16507 doesn't have this brain damage. Read !SYSV386_COMPAT to
16508 figure out what the hardware really does. */
16509 if (STACK_TOP_P (operands[0]))
16510 p = "{p\t%0, %2|rp\t%2, %0}";
16511 else
16512 p = "{rp\t%2, %0|p\t%0, %2}";
16513 #else
16514 if (STACK_TOP_P (operands[0]))
16515 /* As above for fmul/fadd, we can't store to st(0). */
16516 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16517 else
16518 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16519 #endif
16520 break;
16523 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16525 #if SYSV386_COMPAT
16526 if (STACK_TOP_P (operands[0]))
16527 p = "{rp\t%0, %1|p\t%1, %0}";
16528 else
16529 p = "{p\t%1, %0|rp\t%0, %1}";
16530 #else
16531 if (STACK_TOP_P (operands[0]))
16532 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16533 else
16534 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16535 #endif
16536 break;
16539 if (STACK_TOP_P (operands[0]))
16541 if (STACK_TOP_P (operands[1]))
16542 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16543 else
16544 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16545 break;
16547 else if (STACK_TOP_P (operands[1]))
16549 #if SYSV386_COMPAT
16550 p = "{\t%1, %0|r\t%0, %1}";
16551 #else
16552 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16553 #endif
16555 else
16557 #if SYSV386_COMPAT
16558 p = "{r\t%2, %0|\t%0, %2}";
16559 #else
16560 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16561 #endif
16563 break;
16565 default:
16566 gcc_unreachable ();
16569 strcat (buf, p);
16570 return buf;
16573 /* Check if a 256bit AVX register is referenced inside of EXP. */
16575 static bool
16576 ix86_check_avx256_register (const_rtx exp)
16578 if (GET_CODE (exp) == SUBREG)
16579 exp = SUBREG_REG (exp);
16581 return (REG_P (exp)
16582 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16585 /* Return needed mode for entity in optimize_mode_switching pass. */
16587 static int
16588 ix86_avx_u128_mode_needed (rtx_insn *insn)
16590 if (CALL_P (insn))
16592 rtx link;
16594 /* Needed mode is set to AVX_U128_CLEAN if there are
16595 no 256bit modes used in function arguments. */
16596 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16597 link;
16598 link = XEXP (link, 1))
16600 if (GET_CODE (XEXP (link, 0)) == USE)
16602 rtx arg = XEXP (XEXP (link, 0), 0);
16604 if (ix86_check_avx256_register (arg))
16605 return AVX_U128_DIRTY;
16609 return AVX_U128_CLEAN;
16612 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16613 changes state only when a 256bit register is written to, but we need
16614 to prevent the compiler from moving optimal insertion point above
16615 eventual read from 256bit register. */
16616 subrtx_iterator::array_type array;
16617 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16618 if (ix86_check_avx256_register (*iter))
16619 return AVX_U128_DIRTY;
16621 return AVX_U128_ANY;
16624 /* Return mode that i387 must be switched into
16625 prior to the execution of insn. */
16627 static int
16628 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16630 enum attr_i387_cw mode;
16632 /* The mode UNINITIALIZED is used to store control word after a
16633 function call or ASM pattern. The mode ANY specify that function
16634 has no requirements on the control word and make no changes in the
16635 bits we are interested in. */
16637 if (CALL_P (insn)
16638 || (NONJUMP_INSN_P (insn)
16639 && (asm_noperands (PATTERN (insn)) >= 0
16640 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16641 return I387_CW_UNINITIALIZED;
16643 if (recog_memoized (insn) < 0)
16644 return I387_CW_ANY;
16646 mode = get_attr_i387_cw (insn);
16648 switch (entity)
16650 case I387_TRUNC:
16651 if (mode == I387_CW_TRUNC)
16652 return mode;
16653 break;
16655 case I387_FLOOR:
16656 if (mode == I387_CW_FLOOR)
16657 return mode;
16658 break;
16660 case I387_CEIL:
16661 if (mode == I387_CW_CEIL)
16662 return mode;
16663 break;
16665 case I387_MASK_PM:
16666 if (mode == I387_CW_MASK_PM)
16667 return mode;
16668 break;
16670 default:
16671 gcc_unreachable ();
16674 return I387_CW_ANY;
16677 /* Return mode that entity must be switched into
16678 prior to the execution of insn. */
16680 static int
16681 ix86_mode_needed (int entity, rtx_insn *insn)
16683 switch (entity)
16685 case AVX_U128:
16686 return ix86_avx_u128_mode_needed (insn);
16687 case I387_TRUNC:
16688 case I387_FLOOR:
16689 case I387_CEIL:
16690 case I387_MASK_PM:
16691 return ix86_i387_mode_needed (entity, insn);
16692 default:
16693 gcc_unreachable ();
16695 return 0;
16698 /* Check if a 256bit AVX register is referenced in stores. */
16700 static void
16701 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16703 if (ix86_check_avx256_register (dest))
16705 bool *used = (bool *) data;
16706 *used = true;
16710 /* Calculate mode of upper 128bit AVX registers after the insn. */
16712 static int
16713 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16715 rtx pat = PATTERN (insn);
16717 if (vzeroupper_operation (pat, VOIDmode)
16718 || vzeroall_operation (pat, VOIDmode))
16719 return AVX_U128_CLEAN;
16721 /* We know that state is clean after CALL insn if there are no
16722 256bit registers used in the function return register. */
16723 if (CALL_P (insn))
16725 bool avx_reg256_found = false;
16726 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16728 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16731 /* Otherwise, return current mode. Remember that if insn
16732 references AVX 256bit registers, the mode was already changed
16733 to DIRTY from MODE_NEEDED. */
16734 return mode;
16737 /* Return the mode that an insn results in. */
16739 static int
16740 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16742 switch (entity)
16744 case AVX_U128:
16745 return ix86_avx_u128_mode_after (mode, insn);
16746 case I387_TRUNC:
16747 case I387_FLOOR:
16748 case I387_CEIL:
16749 case I387_MASK_PM:
16750 return mode;
16751 default:
16752 gcc_unreachable ();
16756 static int
16757 ix86_avx_u128_mode_entry (void)
16759 tree arg;
16761 /* Entry mode is set to AVX_U128_DIRTY if there are
16762 256bit modes used in function arguments. */
16763 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16764 arg = TREE_CHAIN (arg))
16766 rtx incoming = DECL_INCOMING_RTL (arg);
16768 if (incoming && ix86_check_avx256_register (incoming))
16769 return AVX_U128_DIRTY;
16772 return AVX_U128_CLEAN;
16775 /* Return a mode that ENTITY is assumed to be
16776 switched to at function entry. */
16778 static int
16779 ix86_mode_entry (int entity)
16781 switch (entity)
16783 case AVX_U128:
16784 return ix86_avx_u128_mode_entry ();
16785 case I387_TRUNC:
16786 case I387_FLOOR:
16787 case I387_CEIL:
16788 case I387_MASK_PM:
16789 return I387_CW_ANY;
16790 default:
16791 gcc_unreachable ();
16795 static int
16796 ix86_avx_u128_mode_exit (void)
16798 rtx reg = crtl->return_rtx;
16800 /* Exit mode is set to AVX_U128_DIRTY if there are
16801 256bit modes used in the function return register. */
16802 if (reg && ix86_check_avx256_register (reg))
16803 return AVX_U128_DIRTY;
16805 return AVX_U128_CLEAN;
16808 /* Return a mode that ENTITY is assumed to be
16809 switched to at function exit. */
16811 static int
16812 ix86_mode_exit (int entity)
16814 switch (entity)
16816 case AVX_U128:
16817 return ix86_avx_u128_mode_exit ();
16818 case I387_TRUNC:
16819 case I387_FLOOR:
16820 case I387_CEIL:
16821 case I387_MASK_PM:
16822 return I387_CW_ANY;
16823 default:
16824 gcc_unreachable ();
16828 static int
16829 ix86_mode_priority (int, int n)
16831 return n;
16834 /* Output code to initialize control word copies used by trunc?f?i and
16835 rounding patterns. CURRENT_MODE is set to current control word,
16836 while NEW_MODE is set to new control word. */
16838 static void
16839 emit_i387_cw_initialization (int mode)
16841 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16842 rtx new_mode;
16844 enum ix86_stack_slot slot;
16846 rtx reg = gen_reg_rtx (HImode);
16848 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16849 emit_move_insn (reg, copy_rtx (stored_mode));
16851 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16852 || optimize_insn_for_size_p ())
16854 switch (mode)
16856 case I387_CW_TRUNC:
16857 /* round toward zero (truncate) */
16858 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16859 slot = SLOT_CW_TRUNC;
16860 break;
16862 case I387_CW_FLOOR:
16863 /* round down toward -oo */
16864 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16865 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16866 slot = SLOT_CW_FLOOR;
16867 break;
16869 case I387_CW_CEIL:
16870 /* round up toward +oo */
16871 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16872 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16873 slot = SLOT_CW_CEIL;
16874 break;
16876 case I387_CW_MASK_PM:
16877 /* mask precision exception for nearbyint() */
16878 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16879 slot = SLOT_CW_MASK_PM;
16880 break;
16882 default:
16883 gcc_unreachable ();
16886 else
16888 switch (mode)
16890 case I387_CW_TRUNC:
16891 /* round toward zero (truncate) */
16892 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16893 slot = SLOT_CW_TRUNC;
16894 break;
16896 case I387_CW_FLOOR:
16897 /* round down toward -oo */
16898 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16899 slot = SLOT_CW_FLOOR;
16900 break;
16902 case I387_CW_CEIL:
16903 /* round up toward +oo */
16904 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16905 slot = SLOT_CW_CEIL;
16906 break;
16908 case I387_CW_MASK_PM:
16909 /* mask precision exception for nearbyint() */
16910 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16911 slot = SLOT_CW_MASK_PM;
16912 break;
16914 default:
16915 gcc_unreachable ();
16919 gcc_assert (slot < MAX_386_STACK_LOCALS);
16921 new_mode = assign_386_stack_local (HImode, slot);
16922 emit_move_insn (new_mode, reg);
16925 /* Emit vzeroupper. */
16927 void
16928 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16930 int i;
16932 /* Cancel automatic vzeroupper insertion if there are
16933 live call-saved SSE registers at the insertion point. */
16935 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16936 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16937 return;
16939 if (TARGET_64BIT)
16940 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16941 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16942 return;
16944 emit_insn (gen_avx_vzeroupper ());
16947 /* Generate one or more insns to set ENTITY to MODE. */
16949 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16950 is the set of hard registers live at the point where the insn(s)
16951 are to be inserted. */
16953 static void
16954 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16955 HARD_REG_SET regs_live)
16957 switch (entity)
16959 case AVX_U128:
16960 if (mode == AVX_U128_CLEAN)
16961 ix86_avx_emit_vzeroupper (regs_live);
16962 break;
16963 case I387_TRUNC:
16964 case I387_FLOOR:
16965 case I387_CEIL:
16966 case I387_MASK_PM:
16967 if (mode != I387_CW_ANY
16968 && mode != I387_CW_UNINITIALIZED)
16969 emit_i387_cw_initialization (mode);
16970 break;
16971 default:
16972 gcc_unreachable ();
16976 /* Output code for INSN to convert a float to a signed int. OPERANDS
16977 are the insn operands. The output may be [HSD]Imode and the input
16978 operand may be [SDX]Fmode. */
16980 const char *
16981 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
16983 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16984 int dimode_p = GET_MODE (operands[0]) == DImode;
16985 int round_mode = get_attr_i387_cw (insn);
16987 /* Jump through a hoop or two for DImode, since the hardware has no
16988 non-popping instruction. We used to do this a different way, but
16989 that was somewhat fragile and broke with post-reload splitters. */
16990 if ((dimode_p || fisttp) && !stack_top_dies)
16991 output_asm_insn ("fld\t%y1", operands);
16993 gcc_assert (STACK_TOP_P (operands[1]));
16994 gcc_assert (MEM_P (operands[0]));
16995 gcc_assert (GET_MODE (operands[1]) != TFmode);
16997 if (fisttp)
16998 output_asm_insn ("fisttp%Z0\t%0", operands);
16999 else
17001 if (round_mode != I387_CW_ANY)
17002 output_asm_insn ("fldcw\t%3", operands);
17003 if (stack_top_dies || dimode_p)
17004 output_asm_insn ("fistp%Z0\t%0", operands);
17005 else
17006 output_asm_insn ("fist%Z0\t%0", operands);
17007 if (round_mode != I387_CW_ANY)
17008 output_asm_insn ("fldcw\t%2", operands);
17011 return "";
17014 /* Output code for x87 ffreep insn. The OPNO argument, which may only
17015 have the values zero or one, indicates the ffreep insn's operand
17016 from the OPERANDS array. */
17018 static const char *
17019 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
17021 if (TARGET_USE_FFREEP)
17022 #ifdef HAVE_AS_IX86_FFREEP
17023 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
17024 #else
17026 static char retval[32];
17027 int regno = REGNO (operands[opno]);
17029 gcc_assert (STACK_REGNO_P (regno));
17031 regno -= FIRST_STACK_REG;
17033 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
17034 return retval;
17036 #endif
17038 return opno ? "fstp\t%y1" : "fstp\t%y0";
17042 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
17043 should be used. UNORDERED_P is true when fucom should be used. */
17045 const char *
17046 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
17048 int stack_top_dies;
17049 rtx cmp_op0, cmp_op1;
17050 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
17052 if (eflags_p)
17054 cmp_op0 = operands[0];
17055 cmp_op1 = operands[1];
17057 else
17059 cmp_op0 = operands[1];
17060 cmp_op1 = operands[2];
17063 if (is_sse)
17065 if (GET_MODE (operands[0]) == SFmode)
17066 if (unordered_p)
17067 return "%vucomiss\t{%1, %0|%0, %1}";
17068 else
17069 return "%vcomiss\t{%1, %0|%0, %1}";
17070 else
17071 if (unordered_p)
17072 return "%vucomisd\t{%1, %0|%0, %1}";
17073 else
17074 return "%vcomisd\t{%1, %0|%0, %1}";
17077 gcc_assert (STACK_TOP_P (cmp_op0));
17079 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17081 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
17083 if (stack_top_dies)
17085 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
17086 return output_387_ffreep (operands, 1);
17088 else
17089 return "ftst\n\tfnstsw\t%0";
17092 if (STACK_REG_P (cmp_op1)
17093 && stack_top_dies
17094 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
17095 && REGNO (cmp_op1) != FIRST_STACK_REG)
17097 /* If both the top of the 387 stack dies, and the other operand
17098 is also a stack register that dies, then this must be a
17099 `fcompp' float compare */
17101 if (eflags_p)
17103 /* There is no double popping fcomi variant. Fortunately,
17104 eflags is immune from the fstp's cc clobbering. */
17105 if (unordered_p)
17106 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
17107 else
17108 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
17109 return output_387_ffreep (operands, 0);
17111 else
17113 if (unordered_p)
17114 return "fucompp\n\tfnstsw\t%0";
17115 else
17116 return "fcompp\n\tfnstsw\t%0";
17119 else
17121 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
17123 static const char * const alt[16] =
17125 "fcom%Z2\t%y2\n\tfnstsw\t%0",
17126 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
17127 "fucom%Z2\t%y2\n\tfnstsw\t%0",
17128 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
17130 "ficom%Z2\t%y2\n\tfnstsw\t%0",
17131 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
17132 NULL,
17133 NULL,
17135 "fcomi\t{%y1, %0|%0, %y1}",
17136 "fcomip\t{%y1, %0|%0, %y1}",
17137 "fucomi\t{%y1, %0|%0, %y1}",
17138 "fucomip\t{%y1, %0|%0, %y1}",
17140 NULL,
17141 NULL,
17142 NULL,
17143 NULL
17146 int mask;
17147 const char *ret;
17149 mask = eflags_p << 3;
17150 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17151 mask |= unordered_p << 1;
17152 mask |= stack_top_dies;
17154 gcc_assert (mask < 16);
17155 ret = alt[mask];
17156 gcc_assert (ret);
17158 return ret;
17162 void
17163 ix86_output_addr_vec_elt (FILE *file, int value)
17165 const char *directive = ASM_LONG;
17167 #ifdef ASM_QUAD
17168 if (TARGET_LP64)
17169 directive = ASM_QUAD;
17170 #else
17171 gcc_assert (!TARGET_64BIT);
17172 #endif
17174 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17177 void
17178 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17180 const char *directive = ASM_LONG;
17182 #ifdef ASM_QUAD
17183 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17184 directive = ASM_QUAD;
17185 #else
17186 gcc_assert (!TARGET_64BIT);
17187 #endif
17188 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
17189 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17190 fprintf (file, "%s%s%d-%s%d\n",
17191 directive, LPREFIX, value, LPREFIX, rel);
17192 else if (HAVE_AS_GOTOFF_IN_DATA)
17193 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17194 #if TARGET_MACHO
17195 else if (TARGET_MACHO)
17197 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17198 machopic_output_function_base_name (file);
17199 putc ('\n', file);
17201 #endif
17202 else
17203 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17204 GOT_SYMBOL_NAME, LPREFIX, value);
17207 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17208 for the target. */
17210 void
17211 ix86_expand_clear (rtx dest)
17213 rtx tmp;
17215 /* We play register width games, which are only valid after reload. */
17216 gcc_assert (reload_completed);
17218 /* Avoid HImode and its attendant prefix byte. */
17219 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17220 dest = gen_rtx_REG (SImode, REGNO (dest));
17221 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
17223 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17225 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17226 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17229 emit_insn (tmp);
17232 /* X is an unchanging MEM. If it is a constant pool reference, return
17233 the constant pool rtx, else NULL. */
17236 maybe_get_pool_constant (rtx x)
17238 x = ix86_delegitimize_address (XEXP (x, 0));
17240 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17241 return get_pool_constant (x);
17243 return NULL_RTX;
17246 void
17247 ix86_expand_move (machine_mode mode, rtx operands[])
17249 rtx op0, op1;
17250 enum tls_model model;
17252 op0 = operands[0];
17253 op1 = operands[1];
17255 if (GET_CODE (op1) == SYMBOL_REF)
17257 rtx tmp;
17259 model = SYMBOL_REF_TLS_MODEL (op1);
17260 if (model)
17262 op1 = legitimize_tls_address (op1, model, true);
17263 op1 = force_operand (op1, op0);
17264 if (op1 == op0)
17265 return;
17266 op1 = convert_to_mode (mode, op1, 1);
17268 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17269 op1 = tmp;
17271 else if (GET_CODE (op1) == CONST
17272 && GET_CODE (XEXP (op1, 0)) == PLUS
17273 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17275 rtx addend = XEXP (XEXP (op1, 0), 1);
17276 rtx symbol = XEXP (XEXP (op1, 0), 0);
17277 rtx tmp;
17279 model = SYMBOL_REF_TLS_MODEL (symbol);
17280 if (model)
17281 tmp = legitimize_tls_address (symbol, model, true);
17282 else
17283 tmp = legitimize_pe_coff_symbol (symbol, true);
17285 if (tmp)
17287 tmp = force_operand (tmp, NULL);
17288 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17289 op0, 1, OPTAB_DIRECT);
17290 if (tmp == op0)
17291 return;
17292 op1 = convert_to_mode (mode, tmp, 1);
17296 if ((flag_pic || MACHOPIC_INDIRECT)
17297 && symbolic_operand (op1, mode))
17299 if (TARGET_MACHO && !TARGET_64BIT)
17301 #if TARGET_MACHO
17302 /* dynamic-no-pic */
17303 if (MACHOPIC_INDIRECT)
17305 rtx temp = (op0 && REG_P (op0) && mode == Pmode)
17306 ? op0 : gen_reg_rtx (Pmode);
17307 op1 = machopic_indirect_data_reference (op1, temp);
17308 if (MACHOPIC_PURE)
17309 op1 = machopic_legitimize_pic_address (op1, mode,
17310 temp == op1 ? 0 : temp);
17312 if (op0 != op1 && GET_CODE (op0) != MEM)
17314 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
17315 emit_insn (insn);
17316 return;
17318 if (GET_CODE (op0) == MEM)
17319 op1 = force_reg (Pmode, op1);
17320 else
17322 rtx temp = op0;
17323 if (GET_CODE (temp) != REG)
17324 temp = gen_reg_rtx (Pmode);
17325 temp = legitimize_pic_address (op1, temp);
17326 if (temp == op0)
17327 return;
17328 op1 = temp;
17330 /* dynamic-no-pic */
17331 #endif
17333 else
17335 if (MEM_P (op0))
17336 op1 = force_reg (mode, op1);
17337 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17339 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17340 op1 = legitimize_pic_address (op1, reg);
17341 if (op0 == op1)
17342 return;
17343 op1 = convert_to_mode (mode, op1, 1);
17347 else
17349 if (MEM_P (op0)
17350 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17351 || !push_operand (op0, mode))
17352 && MEM_P (op1))
17353 op1 = force_reg (mode, op1);
17355 if (push_operand (op0, mode)
17356 && ! general_no_elim_operand (op1, mode))
17357 op1 = copy_to_mode_reg (mode, op1);
17359 /* Force large constants in 64bit compilation into register
17360 to get them CSEed. */
17361 if (can_create_pseudo_p ()
17362 && (mode == DImode) && TARGET_64BIT
17363 && immediate_operand (op1, mode)
17364 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17365 && !register_operand (op0, mode)
17366 && optimize)
17367 op1 = copy_to_mode_reg (mode, op1);
17369 if (can_create_pseudo_p ()
17370 && CONST_DOUBLE_P (op1))
17372 /* If we are loading a floating point constant to a register,
17373 force the value to memory now, since we'll get better code
17374 out the back end. */
17376 op1 = validize_mem (force_const_mem (mode, op1));
17377 if (!register_operand (op0, mode))
17379 rtx temp = gen_reg_rtx (mode);
17380 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
17381 emit_move_insn (op0, temp);
17382 return;
17387 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17390 void
17391 ix86_expand_vector_move (machine_mode mode, rtx operands[])
17393 rtx op0 = operands[0], op1 = operands[1];
17394 unsigned int align = GET_MODE_ALIGNMENT (mode);
17396 if (push_operand (op0, VOIDmode))
17397 op0 = emit_move_resolve_push (mode, op0);
17399 /* Force constants other than zero into memory. We do not know how
17400 the instructions used to build constants modify the upper 64 bits
17401 of the register, once we have that information we may be able
17402 to handle some of them more efficiently. */
17403 if (can_create_pseudo_p ()
17404 && register_operand (op0, mode)
17405 && (CONSTANT_P (op1)
17406 || (GET_CODE (op1) == SUBREG
17407 && CONSTANT_P (SUBREG_REG (op1))))
17408 && !standard_sse_constant_p (op1))
17409 op1 = validize_mem (force_const_mem (mode, op1));
17411 /* We need to check memory alignment for SSE mode since attribute
17412 can make operands unaligned. */
17413 if (can_create_pseudo_p ()
17414 && SSE_REG_MODE_P (mode)
17415 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17416 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17418 rtx tmp[2];
17420 /* ix86_expand_vector_move_misalign() does not like constants ... */
17421 if (CONSTANT_P (op1)
17422 || (GET_CODE (op1) == SUBREG
17423 && CONSTANT_P (SUBREG_REG (op1))))
17424 op1 = validize_mem (force_const_mem (mode, op1));
17426 /* ... nor both arguments in memory. */
17427 if (!register_operand (op0, mode)
17428 && !register_operand (op1, mode))
17429 op1 = force_reg (mode, op1);
17431 tmp[0] = op0; tmp[1] = op1;
17432 ix86_expand_vector_move_misalign (mode, tmp);
17433 return;
17436 /* Make operand1 a register if it isn't already. */
17437 if (can_create_pseudo_p ()
17438 && !register_operand (op0, mode)
17439 && !register_operand (op1, mode))
17441 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17442 return;
17445 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17448 /* Split 32-byte AVX unaligned load and store if needed. */
17450 static void
17451 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17453 rtx m;
17454 rtx (*extract) (rtx, rtx, rtx);
17455 rtx (*load_unaligned) (rtx, rtx);
17456 rtx (*store_unaligned) (rtx, rtx);
17457 machine_mode mode;
17459 switch (GET_MODE (op0))
17461 default:
17462 gcc_unreachable ();
17463 case V32QImode:
17464 extract = gen_avx_vextractf128v32qi;
17465 load_unaligned = gen_avx_loaddquv32qi;
17466 store_unaligned = gen_avx_storedquv32qi;
17467 mode = V16QImode;
17468 break;
17469 case V8SFmode:
17470 extract = gen_avx_vextractf128v8sf;
17471 load_unaligned = gen_avx_loadups256;
17472 store_unaligned = gen_avx_storeups256;
17473 mode = V4SFmode;
17474 break;
17475 case V4DFmode:
17476 extract = gen_avx_vextractf128v4df;
17477 load_unaligned = gen_avx_loadupd256;
17478 store_unaligned = gen_avx_storeupd256;
17479 mode = V2DFmode;
17480 break;
17483 if (MEM_P (op1))
17485 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
17486 && optimize_insn_for_speed_p ())
17488 rtx r = gen_reg_rtx (mode);
17489 m = adjust_address (op1, mode, 0);
17490 emit_move_insn (r, m);
17491 m = adjust_address (op1, mode, 16);
17492 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17493 emit_move_insn (op0, r);
17495 /* Normal *mov<mode>_internal pattern will handle
17496 unaligned loads just fine if misaligned_operand
17497 is true, and without the UNSPEC it can be combined
17498 with arithmetic instructions. */
17499 else if (misaligned_operand (op1, GET_MODE (op1)))
17500 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17501 else
17502 emit_insn (load_unaligned (op0, op1));
17504 else if (MEM_P (op0))
17506 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
17507 && optimize_insn_for_speed_p ())
17509 m = adjust_address (op0, mode, 0);
17510 emit_insn (extract (m, op1, const0_rtx));
17511 m = adjust_address (op0, mode, 16);
17512 emit_insn (extract (m, op1, const1_rtx));
17514 else
17515 emit_insn (store_unaligned (op0, op1));
17517 else
17518 gcc_unreachable ();
17521 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17522 straight to ix86_expand_vector_move. */
17523 /* Code generation for scalar reg-reg moves of single and double precision data:
17524 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17525 movaps reg, reg
17526 else
17527 movss reg, reg
17528 if (x86_sse_partial_reg_dependency == true)
17529 movapd reg, reg
17530 else
17531 movsd reg, reg
17533 Code generation for scalar loads of double precision data:
17534 if (x86_sse_split_regs == true)
17535 movlpd mem, reg (gas syntax)
17536 else
17537 movsd mem, reg
17539 Code generation for unaligned packed loads of single precision data
17540 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17541 if (x86_sse_unaligned_move_optimal)
17542 movups mem, reg
17544 if (x86_sse_partial_reg_dependency == true)
17546 xorps reg, reg
17547 movlps mem, reg
17548 movhps mem+8, reg
17550 else
17552 movlps mem, reg
17553 movhps mem+8, reg
17556 Code generation for unaligned packed loads of double precision data
17557 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17558 if (x86_sse_unaligned_move_optimal)
17559 movupd mem, reg
17561 if (x86_sse_split_regs == true)
17563 movlpd mem, reg
17564 movhpd mem+8, reg
17566 else
17568 movsd mem, reg
17569 movhpd mem+8, reg
17573 void
17574 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17576 rtx op0, op1, orig_op0 = NULL_RTX, m;
17577 rtx (*load_unaligned) (rtx, rtx);
17578 rtx (*store_unaligned) (rtx, rtx);
17580 op0 = operands[0];
17581 op1 = operands[1];
17583 if (GET_MODE_SIZE (mode) == 64)
17585 switch (GET_MODE_CLASS (mode))
17587 case MODE_VECTOR_INT:
17588 case MODE_INT:
17589 if (GET_MODE (op0) != V16SImode)
17591 if (!MEM_P (op0))
17593 orig_op0 = op0;
17594 op0 = gen_reg_rtx (V16SImode);
17596 else
17597 op0 = gen_lowpart (V16SImode, op0);
17599 op1 = gen_lowpart (V16SImode, op1);
17600 /* FALLTHRU */
17602 case MODE_VECTOR_FLOAT:
17603 switch (GET_MODE (op0))
17605 default:
17606 gcc_unreachable ();
17607 case V16SImode:
17608 load_unaligned = gen_avx512f_loaddquv16si;
17609 store_unaligned = gen_avx512f_storedquv16si;
17610 break;
17611 case V16SFmode:
17612 load_unaligned = gen_avx512f_loadups512;
17613 store_unaligned = gen_avx512f_storeups512;
17614 break;
17615 case V8DFmode:
17616 load_unaligned = gen_avx512f_loadupd512;
17617 store_unaligned = gen_avx512f_storeupd512;
17618 break;
17621 if (MEM_P (op1))
17622 emit_insn (load_unaligned (op0, op1));
17623 else if (MEM_P (op0))
17624 emit_insn (store_unaligned (op0, op1));
17625 else
17626 gcc_unreachable ();
17627 if (orig_op0)
17628 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17629 break;
17631 default:
17632 gcc_unreachable ();
17635 return;
17638 if (TARGET_AVX
17639 && GET_MODE_SIZE (mode) == 32)
17641 switch (GET_MODE_CLASS (mode))
17643 case MODE_VECTOR_INT:
17644 case MODE_INT:
17645 if (GET_MODE (op0) != V32QImode)
17647 if (!MEM_P (op0))
17649 orig_op0 = op0;
17650 op0 = gen_reg_rtx (V32QImode);
17652 else
17653 op0 = gen_lowpart (V32QImode, op0);
17655 op1 = gen_lowpart (V32QImode, op1);
17656 /* FALLTHRU */
17658 case MODE_VECTOR_FLOAT:
17659 ix86_avx256_split_vector_move_misalign (op0, op1);
17660 if (orig_op0)
17661 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17662 break;
17664 default:
17665 gcc_unreachable ();
17668 return;
17671 if (MEM_P (op1))
17673 /* Normal *mov<mode>_internal pattern will handle
17674 unaligned loads just fine if misaligned_operand
17675 is true, and without the UNSPEC it can be combined
17676 with arithmetic instructions. */
17677 if (TARGET_AVX
17678 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17679 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17680 && misaligned_operand (op1, GET_MODE (op1)))
17681 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17682 /* ??? If we have typed data, then it would appear that using
17683 movdqu is the only way to get unaligned data loaded with
17684 integer type. */
17685 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17687 if (GET_MODE (op0) != V16QImode)
17689 orig_op0 = op0;
17690 op0 = gen_reg_rtx (V16QImode);
17692 op1 = gen_lowpart (V16QImode, op1);
17693 /* We will eventually emit movups based on insn attributes. */
17694 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17695 if (orig_op0)
17696 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17698 else if (TARGET_SSE2 && mode == V2DFmode)
17700 rtx zero;
17702 if (TARGET_AVX
17703 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17704 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17705 || optimize_insn_for_size_p ())
17707 /* We will eventually emit movups based on insn attributes. */
17708 emit_insn (gen_sse2_loadupd (op0, op1));
17709 return;
17712 /* When SSE registers are split into halves, we can avoid
17713 writing to the top half twice. */
17714 if (TARGET_SSE_SPLIT_REGS)
17716 emit_clobber (op0);
17717 zero = op0;
17719 else
17721 /* ??? Not sure about the best option for the Intel chips.
17722 The following would seem to satisfy; the register is
17723 entirely cleared, breaking the dependency chain. We
17724 then store to the upper half, with a dependency depth
17725 of one. A rumor has it that Intel recommends two movsd
17726 followed by an unpacklpd, but this is unconfirmed. And
17727 given that the dependency depth of the unpacklpd would
17728 still be one, I'm not sure why this would be better. */
17729 zero = CONST0_RTX (V2DFmode);
17732 m = adjust_address (op1, DFmode, 0);
17733 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17734 m = adjust_address (op1, DFmode, 8);
17735 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17737 else
17739 rtx t;
17741 if (TARGET_AVX
17742 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17743 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17744 || optimize_insn_for_size_p ())
17746 if (GET_MODE (op0) != V4SFmode)
17748 orig_op0 = op0;
17749 op0 = gen_reg_rtx (V4SFmode);
17751 op1 = gen_lowpart (V4SFmode, op1);
17752 emit_insn (gen_sse_loadups (op0, op1));
17753 if (orig_op0)
17754 emit_move_insn (orig_op0,
17755 gen_lowpart (GET_MODE (orig_op0), op0));
17756 return;
17759 if (mode != V4SFmode)
17760 t = gen_reg_rtx (V4SFmode);
17761 else
17762 t = op0;
17764 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17765 emit_move_insn (t, CONST0_RTX (V4SFmode));
17766 else
17767 emit_clobber (t);
17769 m = adjust_address (op1, V2SFmode, 0);
17770 emit_insn (gen_sse_loadlps (t, t, m));
17771 m = adjust_address (op1, V2SFmode, 8);
17772 emit_insn (gen_sse_loadhps (t, t, m));
17773 if (mode != V4SFmode)
17774 emit_move_insn (op0, gen_lowpart (mode, t));
17777 else if (MEM_P (op0))
17779 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17781 op0 = gen_lowpart (V16QImode, op0);
17782 op1 = gen_lowpart (V16QImode, op1);
17783 /* We will eventually emit movups based on insn attributes. */
17784 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17786 else if (TARGET_SSE2 && mode == V2DFmode)
17788 if (TARGET_AVX
17789 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17790 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17791 || optimize_insn_for_size_p ())
17792 /* We will eventually emit movups based on insn attributes. */
17793 emit_insn (gen_sse2_storeupd (op0, op1));
17794 else
17796 m = adjust_address (op0, DFmode, 0);
17797 emit_insn (gen_sse2_storelpd (m, op1));
17798 m = adjust_address (op0, DFmode, 8);
17799 emit_insn (gen_sse2_storehpd (m, op1));
17802 else
17804 if (mode != V4SFmode)
17805 op1 = gen_lowpart (V4SFmode, op1);
17807 if (TARGET_AVX
17808 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17809 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17810 || optimize_insn_for_size_p ())
17812 op0 = gen_lowpart (V4SFmode, op0);
17813 emit_insn (gen_sse_storeups (op0, op1));
17815 else
17817 m = adjust_address (op0, V2SFmode, 0);
17818 emit_insn (gen_sse_storelps (m, op1));
17819 m = adjust_address (op0, V2SFmode, 8);
17820 emit_insn (gen_sse_storehps (m, op1));
17824 else
17825 gcc_unreachable ();
17828 /* Helper function of ix86_fixup_binary_operands to canonicalize
17829 operand order. Returns true if the operands should be swapped. */
17831 static bool
17832 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
17833 rtx operands[])
17835 rtx dst = operands[0];
17836 rtx src1 = operands[1];
17837 rtx src2 = operands[2];
17839 /* If the operation is not commutative, we can't do anything. */
17840 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17841 return false;
17843 /* Highest priority is that src1 should match dst. */
17844 if (rtx_equal_p (dst, src1))
17845 return false;
17846 if (rtx_equal_p (dst, src2))
17847 return true;
17849 /* Next highest priority is that immediate constants come second. */
17850 if (immediate_operand (src2, mode))
17851 return false;
17852 if (immediate_operand (src1, mode))
17853 return true;
17855 /* Lowest priority is that memory references should come second. */
17856 if (MEM_P (src2))
17857 return false;
17858 if (MEM_P (src1))
17859 return true;
17861 return false;
17865 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17866 destination to use for the operation. If different from the true
17867 destination in operands[0], a copy operation will be required. */
17870 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
17871 rtx operands[])
17873 rtx dst = operands[0];
17874 rtx src1 = operands[1];
17875 rtx src2 = operands[2];
17877 /* Canonicalize operand order. */
17878 if (ix86_swap_binary_operands_p (code, mode, operands))
17880 /* It is invalid to swap operands of different modes. */
17881 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17883 std::swap (src1, src2);
17886 /* Both source operands cannot be in memory. */
17887 if (MEM_P (src1) && MEM_P (src2))
17889 /* Optimization: Only read from memory once. */
17890 if (rtx_equal_p (src1, src2))
17892 src2 = force_reg (mode, src2);
17893 src1 = src2;
17895 else if (rtx_equal_p (dst, src1))
17896 src2 = force_reg (mode, src2);
17897 else
17898 src1 = force_reg (mode, src1);
17901 /* If the destination is memory, and we do not have matching source
17902 operands, do things in registers. */
17903 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17904 dst = gen_reg_rtx (mode);
17906 /* Source 1 cannot be a constant. */
17907 if (CONSTANT_P (src1))
17908 src1 = force_reg (mode, src1);
17910 /* Source 1 cannot be a non-matching memory. */
17911 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17912 src1 = force_reg (mode, src1);
17914 /* Improve address combine. */
17915 if (code == PLUS
17916 && GET_MODE_CLASS (mode) == MODE_INT
17917 && MEM_P (src2))
17918 src2 = force_reg (mode, src2);
17920 operands[1] = src1;
17921 operands[2] = src2;
17922 return dst;
17925 /* Similarly, but assume that the destination has already been
17926 set up properly. */
17928 void
17929 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17930 machine_mode mode, rtx operands[])
17932 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17933 gcc_assert (dst == operands[0]);
17936 /* Attempt to expand a binary operator. Make the expansion closer to the
17937 actual machine, then just general_operand, which will allow 3 separate
17938 memory references (one output, two input) in a single insn. */
17940 void
17941 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
17942 rtx operands[])
17944 rtx src1, src2, dst, op, clob;
17946 dst = ix86_fixup_binary_operands (code, mode, operands);
17947 src1 = operands[1];
17948 src2 = operands[2];
17950 /* Emit the instruction. */
17952 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17954 if (reload_completed
17955 && code == PLUS
17956 && !rtx_equal_p (dst, src1))
17958 /* This is going to be an LEA; avoid splitting it later. */
17959 emit_insn (op);
17961 else
17963 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17964 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17967 /* Fix up the destination if needed. */
17968 if (dst != operands[0])
17969 emit_move_insn (operands[0], dst);
17972 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17973 the given OPERANDS. */
17975 void
17976 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
17977 rtx operands[])
17979 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17980 if (GET_CODE (operands[1]) == SUBREG)
17982 op1 = operands[1];
17983 op2 = operands[2];
17985 else if (GET_CODE (operands[2]) == SUBREG)
17987 op1 = operands[2];
17988 op2 = operands[1];
17990 /* Optimize (__m128i) d | (__m128i) e and similar code
17991 when d and e are float vectors into float vector logical
17992 insn. In C/C++ without using intrinsics there is no other way
17993 to express vector logical operation on float vectors than
17994 to cast them temporarily to integer vectors. */
17995 if (op1
17996 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17997 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17998 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17999 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
18000 && SUBREG_BYTE (op1) == 0
18001 && (GET_CODE (op2) == CONST_VECTOR
18002 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
18003 && SUBREG_BYTE (op2) == 0))
18004 && can_create_pseudo_p ())
18006 rtx dst;
18007 switch (GET_MODE (SUBREG_REG (op1)))
18009 case V4SFmode:
18010 case V8SFmode:
18011 case V16SFmode:
18012 case V2DFmode:
18013 case V4DFmode:
18014 case V8DFmode:
18015 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
18016 if (GET_CODE (op2) == CONST_VECTOR)
18018 op2 = gen_lowpart (GET_MODE (dst), op2);
18019 op2 = force_reg (GET_MODE (dst), op2);
18021 else
18023 op1 = operands[1];
18024 op2 = SUBREG_REG (operands[2]);
18025 if (!nonimmediate_operand (op2, GET_MODE (dst)))
18026 op2 = force_reg (GET_MODE (dst), op2);
18028 op1 = SUBREG_REG (op1);
18029 if (!nonimmediate_operand (op1, GET_MODE (dst)))
18030 op1 = force_reg (GET_MODE (dst), op1);
18031 emit_insn (gen_rtx_SET (VOIDmode, dst,
18032 gen_rtx_fmt_ee (code, GET_MODE (dst),
18033 op1, op2)));
18034 emit_move_insn (operands[0], gen_lowpart (mode, dst));
18035 return;
18036 default:
18037 break;
18040 if (!nonimmediate_operand (operands[1], mode))
18041 operands[1] = force_reg (mode, operands[1]);
18042 if (!nonimmediate_operand (operands[2], mode))
18043 operands[2] = force_reg (mode, operands[2]);
18044 ix86_fixup_binary_operands_no_copy (code, mode, operands);
18045 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18046 gen_rtx_fmt_ee (code, mode, operands[1],
18047 operands[2])));
18050 /* Return TRUE or FALSE depending on whether the binary operator meets the
18051 appropriate constraints. */
18053 bool
18054 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
18055 rtx operands[3])
18057 rtx dst = operands[0];
18058 rtx src1 = operands[1];
18059 rtx src2 = operands[2];
18061 /* Both source operands cannot be in memory. */
18062 if (MEM_P (src1) && MEM_P (src2))
18063 return false;
18065 /* Canonicalize operand order for commutative operators. */
18066 if (ix86_swap_binary_operands_p (code, mode, operands))
18067 std::swap (src1, src2);
18069 /* If the destination is memory, we must have a matching source operand. */
18070 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18071 return false;
18073 /* Source 1 cannot be a constant. */
18074 if (CONSTANT_P (src1))
18075 return false;
18077 /* Source 1 cannot be a non-matching memory. */
18078 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18079 /* Support "andhi/andsi/anddi" as a zero-extending move. */
18080 return (code == AND
18081 && (mode == HImode
18082 || mode == SImode
18083 || (TARGET_64BIT && mode == DImode))
18084 && satisfies_constraint_L (src2));
18086 return true;
18089 /* Attempt to expand a unary operator. Make the expansion closer to the
18090 actual machine, then just general_operand, which will allow 2 separate
18091 memory references (one output, one input) in a single insn. */
18093 void
18094 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
18095 rtx operands[])
18097 bool matching_memory = false;
18098 rtx src, dst, op, clob;
18100 dst = operands[0];
18101 src = operands[1];
18103 /* If the destination is memory, and we do not have matching source
18104 operands, do things in registers. */
18105 if (MEM_P (dst))
18107 if (rtx_equal_p (dst, src))
18108 matching_memory = true;
18109 else
18110 dst = gen_reg_rtx (mode);
18113 /* When source operand is memory, destination must match. */
18114 if (MEM_P (src) && !matching_memory)
18115 src = force_reg (mode, src);
18117 /* Emit the instruction. */
18119 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
18121 if (code == NOT)
18122 emit_insn (op);
18123 else
18125 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18126 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18129 /* Fix up the destination if needed. */
18130 if (dst != operands[0])
18131 emit_move_insn (operands[0], dst);
18134 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18135 divisor are within the range [0-255]. */
18137 void
18138 ix86_split_idivmod (machine_mode mode, rtx operands[],
18139 bool signed_p)
18141 rtx_code_label *end_label, *qimode_label;
18142 rtx insn, div, mod;
18143 rtx scratch, tmp0, tmp1, tmp2;
18144 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18145 rtx (*gen_zero_extend) (rtx, rtx);
18146 rtx (*gen_test_ccno_1) (rtx, rtx);
18148 switch (mode)
18150 case SImode:
18151 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18152 gen_test_ccno_1 = gen_testsi_ccno_1;
18153 gen_zero_extend = gen_zero_extendqisi2;
18154 break;
18155 case DImode:
18156 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18157 gen_test_ccno_1 = gen_testdi_ccno_1;
18158 gen_zero_extend = gen_zero_extendqidi2;
18159 break;
18160 default:
18161 gcc_unreachable ();
18164 end_label = gen_label_rtx ();
18165 qimode_label = gen_label_rtx ();
18167 scratch = gen_reg_rtx (mode);
18169 /* Use 8bit unsigned divimod if dividend and divisor are within
18170 the range [0-255]. */
18171 emit_move_insn (scratch, operands[2]);
18172 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18173 scratch, 1, OPTAB_DIRECT);
18174 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18175 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18176 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18177 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18178 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18179 pc_rtx);
18180 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
18181 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18182 JUMP_LABEL (insn) = qimode_label;
18184 /* Generate original signed/unsigned divimod. */
18185 div = gen_divmod4_1 (operands[0], operands[1],
18186 operands[2], operands[3]);
18187 emit_insn (div);
18189 /* Branch to the end. */
18190 emit_jump_insn (gen_jump (end_label));
18191 emit_barrier ();
18193 /* Generate 8bit unsigned divide. */
18194 emit_label (qimode_label);
18195 /* Don't use operands[0] for result of 8bit divide since not all
18196 registers support QImode ZERO_EXTRACT. */
18197 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18198 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18199 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18200 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18202 if (signed_p)
18204 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18205 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18207 else
18209 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18210 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18213 /* Extract remainder from AH. */
18214 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18215 if (REG_P (operands[1]))
18216 insn = emit_move_insn (operands[1], tmp1);
18217 else
18219 /* Need a new scratch register since the old one has result
18220 of 8bit divide. */
18221 scratch = gen_reg_rtx (mode);
18222 emit_move_insn (scratch, tmp1);
18223 insn = emit_move_insn (operands[1], scratch);
18225 set_unique_reg_note (insn, REG_EQUAL, mod);
18227 /* Zero extend quotient from AL. */
18228 tmp1 = gen_lowpart (QImode, tmp0);
18229 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18230 set_unique_reg_note (insn, REG_EQUAL, div);
18232 emit_label (end_label);
18235 #define LEA_MAX_STALL (3)
18236 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18238 /* Increase given DISTANCE in half-cycles according to
18239 dependencies between PREV and NEXT instructions.
18240 Add 1 half-cycle if there is no dependency and
18241 go to next cycle if there is some dependecy. */
18243 static unsigned int
18244 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18246 df_ref def, use;
18248 if (!prev || !next)
18249 return distance + (distance & 1) + 2;
18251 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18252 return distance + 1;
18254 FOR_EACH_INSN_USE (use, next)
18255 FOR_EACH_INSN_DEF (def, prev)
18256 if (!DF_REF_IS_ARTIFICIAL (def)
18257 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18258 return distance + (distance & 1) + 2;
18260 return distance + 1;
18263 /* Function checks if instruction INSN defines register number
18264 REGNO1 or REGNO2. */
18266 static bool
18267 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18268 rtx_insn *insn)
18270 df_ref def;
18272 FOR_EACH_INSN_DEF (def, insn)
18273 if (DF_REF_REG_DEF_P (def)
18274 && !DF_REF_IS_ARTIFICIAL (def)
18275 && (regno1 == DF_REF_REGNO (def)
18276 || regno2 == DF_REF_REGNO (def)))
18277 return true;
18279 return false;
18282 /* Function checks if instruction INSN uses register number
18283 REGNO as a part of address expression. */
18285 static bool
18286 insn_uses_reg_mem (unsigned int regno, rtx insn)
18288 df_ref use;
18290 FOR_EACH_INSN_USE (use, insn)
18291 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18292 return true;
18294 return false;
18297 /* Search backward for non-agu definition of register number REGNO1
18298 or register number REGNO2 in basic block starting from instruction
18299 START up to head of basic block or instruction INSN.
18301 Function puts true value into *FOUND var if definition was found
18302 and false otherwise.
18304 Distance in half-cycles between START and found instruction or head
18305 of BB is added to DISTANCE and returned. */
18307 static int
18308 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18309 rtx_insn *insn, int distance,
18310 rtx_insn *start, bool *found)
18312 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18313 rtx_insn *prev = start;
18314 rtx_insn *next = NULL;
18316 *found = false;
18318 while (prev
18319 && prev != insn
18320 && distance < LEA_SEARCH_THRESHOLD)
18322 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18324 distance = increase_distance (prev, next, distance);
18325 if (insn_defines_reg (regno1, regno2, prev))
18327 if (recog_memoized (prev) < 0
18328 || get_attr_type (prev) != TYPE_LEA)
18330 *found = true;
18331 return distance;
18335 next = prev;
18337 if (prev == BB_HEAD (bb))
18338 break;
18340 prev = PREV_INSN (prev);
18343 return distance;
18346 /* Search backward for non-agu definition of register number REGNO1
18347 or register number REGNO2 in INSN's basic block until
18348 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18349 2. Reach neighbour BBs boundary, or
18350 3. Reach agu definition.
18351 Returns the distance between the non-agu definition point and INSN.
18352 If no definition point, returns -1. */
18354 static int
18355 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18356 rtx_insn *insn)
18358 basic_block bb = BLOCK_FOR_INSN (insn);
18359 int distance = 0;
18360 bool found = false;
18362 if (insn != BB_HEAD (bb))
18363 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18364 distance, PREV_INSN (insn),
18365 &found);
18367 if (!found && distance < LEA_SEARCH_THRESHOLD)
18369 edge e;
18370 edge_iterator ei;
18371 bool simple_loop = false;
18373 FOR_EACH_EDGE (e, ei, bb->preds)
18374 if (e->src == bb)
18376 simple_loop = true;
18377 break;
18380 if (simple_loop)
18381 distance = distance_non_agu_define_in_bb (regno1, regno2,
18382 insn, distance,
18383 BB_END (bb), &found);
18384 else
18386 int shortest_dist = -1;
18387 bool found_in_bb = false;
18389 FOR_EACH_EDGE (e, ei, bb->preds)
18391 int bb_dist
18392 = distance_non_agu_define_in_bb (regno1, regno2,
18393 insn, distance,
18394 BB_END (e->src),
18395 &found_in_bb);
18396 if (found_in_bb)
18398 if (shortest_dist < 0)
18399 shortest_dist = bb_dist;
18400 else if (bb_dist > 0)
18401 shortest_dist = MIN (bb_dist, shortest_dist);
18403 found = true;
18407 distance = shortest_dist;
18411 /* get_attr_type may modify recog data. We want to make sure
18412 that recog data is valid for instruction INSN, on which
18413 distance_non_agu_define is called. INSN is unchanged here. */
18414 extract_insn_cached (insn);
18416 if (!found)
18417 return -1;
18419 return distance >> 1;
18422 /* Return the distance in half-cycles between INSN and the next
18423 insn that uses register number REGNO in memory address added
18424 to DISTANCE. Return -1 if REGNO0 is set.
18426 Put true value into *FOUND if register usage was found and
18427 false otherwise.
18428 Put true value into *REDEFINED if register redefinition was
18429 found and false otherwise. */
18431 static int
18432 distance_agu_use_in_bb (unsigned int regno,
18433 rtx_insn *insn, int distance, rtx_insn *start,
18434 bool *found, bool *redefined)
18436 basic_block bb = NULL;
18437 rtx_insn *next = start;
18438 rtx_insn *prev = NULL;
18440 *found = false;
18441 *redefined = false;
18443 if (start != NULL_RTX)
18445 bb = BLOCK_FOR_INSN (start);
18446 if (start != BB_HEAD (bb))
18447 /* If insn and start belong to the same bb, set prev to insn,
18448 so the call to increase_distance will increase the distance
18449 between insns by 1. */
18450 prev = insn;
18453 while (next
18454 && next != insn
18455 && distance < LEA_SEARCH_THRESHOLD)
18457 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18459 distance = increase_distance(prev, next, distance);
18460 if (insn_uses_reg_mem (regno, next))
18462 /* Return DISTANCE if OP0 is used in memory
18463 address in NEXT. */
18464 *found = true;
18465 return distance;
18468 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18470 /* Return -1 if OP0 is set in NEXT. */
18471 *redefined = true;
18472 return -1;
18475 prev = next;
18478 if (next == BB_END (bb))
18479 break;
18481 next = NEXT_INSN (next);
18484 return distance;
18487 /* Return the distance between INSN and the next insn that uses
18488 register number REGNO0 in memory address. Return -1 if no such
18489 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18491 static int
18492 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18494 basic_block bb = BLOCK_FOR_INSN (insn);
18495 int distance = 0;
18496 bool found = false;
18497 bool redefined = false;
18499 if (insn != BB_END (bb))
18500 distance = distance_agu_use_in_bb (regno0, insn, distance,
18501 NEXT_INSN (insn),
18502 &found, &redefined);
18504 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18506 edge e;
18507 edge_iterator ei;
18508 bool simple_loop = false;
18510 FOR_EACH_EDGE (e, ei, bb->succs)
18511 if (e->dest == bb)
18513 simple_loop = true;
18514 break;
18517 if (simple_loop)
18518 distance = distance_agu_use_in_bb (regno0, insn,
18519 distance, BB_HEAD (bb),
18520 &found, &redefined);
18521 else
18523 int shortest_dist = -1;
18524 bool found_in_bb = false;
18525 bool redefined_in_bb = false;
18527 FOR_EACH_EDGE (e, ei, bb->succs)
18529 int bb_dist
18530 = distance_agu_use_in_bb (regno0, insn,
18531 distance, BB_HEAD (e->dest),
18532 &found_in_bb, &redefined_in_bb);
18533 if (found_in_bb)
18535 if (shortest_dist < 0)
18536 shortest_dist = bb_dist;
18537 else if (bb_dist > 0)
18538 shortest_dist = MIN (bb_dist, shortest_dist);
18540 found = true;
18544 distance = shortest_dist;
18548 if (!found || redefined)
18549 return -1;
18551 return distance >> 1;
18554 /* Define this macro to tune LEA priority vs ADD, it take effect when
18555 there is a dilemma of choicing LEA or ADD
18556 Negative value: ADD is more preferred than LEA
18557 Zero: Netrual
18558 Positive value: LEA is more preferred than ADD*/
18559 #define IX86_LEA_PRIORITY 0
18561 /* Return true if usage of lea INSN has performance advantage
18562 over a sequence of instructions. Instructions sequence has
18563 SPLIT_COST cycles higher latency than lea latency. */
18565 static bool
18566 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18567 unsigned int regno2, int split_cost, bool has_scale)
18569 int dist_define, dist_use;
18571 /* For Silvermont if using a 2-source or 3-source LEA for
18572 non-destructive destination purposes, or due to wanting
18573 ability to use SCALE, the use of LEA is justified. */
18574 if (TARGET_SILVERMONT || TARGET_INTEL)
18576 if (has_scale)
18577 return true;
18578 if (split_cost < 1)
18579 return false;
18580 if (regno0 == regno1 || regno0 == regno2)
18581 return false;
18582 return true;
18585 dist_define = distance_non_agu_define (regno1, regno2, insn);
18586 dist_use = distance_agu_use (regno0, insn);
18588 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18590 /* If there is no non AGU operand definition, no AGU
18591 operand usage and split cost is 0 then both lea
18592 and non lea variants have same priority. Currently
18593 we prefer lea for 64 bit code and non lea on 32 bit
18594 code. */
18595 if (dist_use < 0 && split_cost == 0)
18596 return TARGET_64BIT || IX86_LEA_PRIORITY;
18597 else
18598 return true;
18601 /* With longer definitions distance lea is more preferable.
18602 Here we change it to take into account splitting cost and
18603 lea priority. */
18604 dist_define += split_cost + IX86_LEA_PRIORITY;
18606 /* If there is no use in memory addess then we just check
18607 that split cost exceeds AGU stall. */
18608 if (dist_use < 0)
18609 return dist_define > LEA_MAX_STALL;
18611 /* If this insn has both backward non-agu dependence and forward
18612 agu dependence, the one with short distance takes effect. */
18613 return dist_define >= dist_use;
18616 /* Return true if it is legal to clobber flags by INSN and
18617 false otherwise. */
18619 static bool
18620 ix86_ok_to_clobber_flags (rtx_insn *insn)
18622 basic_block bb = BLOCK_FOR_INSN (insn);
18623 df_ref use;
18624 bitmap live;
18626 while (insn)
18628 if (NONDEBUG_INSN_P (insn))
18630 FOR_EACH_INSN_USE (use, insn)
18631 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18632 return false;
18634 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18635 return true;
18638 if (insn == BB_END (bb))
18639 break;
18641 insn = NEXT_INSN (insn);
18644 live = df_get_live_out(bb);
18645 return !REGNO_REG_SET_P (live, FLAGS_REG);
18648 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18649 move and add to avoid AGU stalls. */
18651 bool
18652 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18654 unsigned int regno0, regno1, regno2;
18656 /* Check if we need to optimize. */
18657 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18658 return false;
18660 /* Check it is correct to split here. */
18661 if (!ix86_ok_to_clobber_flags(insn))
18662 return false;
18664 regno0 = true_regnum (operands[0]);
18665 regno1 = true_regnum (operands[1]);
18666 regno2 = true_regnum (operands[2]);
18668 /* We need to split only adds with non destructive
18669 destination operand. */
18670 if (regno0 == regno1 || regno0 == regno2)
18671 return false;
18672 else
18673 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18676 /* Return true if we should emit lea instruction instead of mov
18677 instruction. */
18679 bool
18680 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18682 unsigned int regno0, regno1;
18684 /* Check if we need to optimize. */
18685 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18686 return false;
18688 /* Use lea for reg to reg moves only. */
18689 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18690 return false;
18692 regno0 = true_regnum (operands[0]);
18693 regno1 = true_regnum (operands[1]);
18695 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18698 /* Return true if we need to split lea into a sequence of
18699 instructions to avoid AGU stalls. */
18701 bool
18702 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18704 unsigned int regno0, regno1, regno2;
18705 int split_cost;
18706 struct ix86_address parts;
18707 int ok;
18709 /* Check we need to optimize. */
18710 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18711 return false;
18713 /* The "at least two components" test below might not catch simple
18714 move or zero extension insns if parts.base is non-NULL and parts.disp
18715 is const0_rtx as the only components in the address, e.g. if the
18716 register is %rbp or %r13. As this test is much cheaper and moves or
18717 zero extensions are the common case, do this check first. */
18718 if (REG_P (operands[1])
18719 || (SImode_address_operand (operands[1], VOIDmode)
18720 && REG_P (XEXP (operands[1], 0))))
18721 return false;
18723 /* Check if it is OK to split here. */
18724 if (!ix86_ok_to_clobber_flags (insn))
18725 return false;
18727 ok = ix86_decompose_address (operands[1], &parts);
18728 gcc_assert (ok);
18730 /* There should be at least two components in the address. */
18731 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18732 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18733 return false;
18735 /* We should not split into add if non legitimate pic
18736 operand is used as displacement. */
18737 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18738 return false;
18740 regno0 = true_regnum (operands[0]) ;
18741 regno1 = INVALID_REGNUM;
18742 regno2 = INVALID_REGNUM;
18744 if (parts.base)
18745 regno1 = true_regnum (parts.base);
18746 if (parts.index)
18747 regno2 = true_regnum (parts.index);
18749 split_cost = 0;
18751 /* Compute how many cycles we will add to execution time
18752 if split lea into a sequence of instructions. */
18753 if (parts.base || parts.index)
18755 /* Have to use mov instruction if non desctructive
18756 destination form is used. */
18757 if (regno1 != regno0 && regno2 != regno0)
18758 split_cost += 1;
18760 /* Have to add index to base if both exist. */
18761 if (parts.base && parts.index)
18762 split_cost += 1;
18764 /* Have to use shift and adds if scale is 2 or greater. */
18765 if (parts.scale > 1)
18767 if (regno0 != regno1)
18768 split_cost += 1;
18769 else if (regno2 == regno0)
18770 split_cost += 4;
18771 else
18772 split_cost += parts.scale;
18775 /* Have to use add instruction with immediate if
18776 disp is non zero. */
18777 if (parts.disp && parts.disp != const0_rtx)
18778 split_cost += 1;
18780 /* Subtract the price of lea. */
18781 split_cost -= 1;
18784 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18785 parts.scale > 1);
18788 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18789 matches destination. RTX includes clobber of FLAGS_REG. */
18791 static void
18792 ix86_emit_binop (enum rtx_code code, machine_mode mode,
18793 rtx dst, rtx src)
18795 rtx op, clob;
18797 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18798 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18800 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18803 /* Return true if regno1 def is nearest to the insn. */
18805 static bool
18806 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18808 rtx_insn *prev = insn;
18809 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18811 if (insn == start)
18812 return false;
18813 while (prev && prev != start)
18815 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18817 prev = PREV_INSN (prev);
18818 continue;
18820 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18821 return true;
18822 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18823 return false;
18824 prev = PREV_INSN (prev);
18827 /* None of the regs is defined in the bb. */
18828 return false;
18831 /* Split lea instructions into a sequence of instructions
18832 which are executed on ALU to avoid AGU stalls.
18833 It is assumed that it is allowed to clobber flags register
18834 at lea position. */
18836 void
18837 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
18839 unsigned int regno0, regno1, regno2;
18840 struct ix86_address parts;
18841 rtx target, tmp;
18842 int ok, adds;
18844 ok = ix86_decompose_address (operands[1], &parts);
18845 gcc_assert (ok);
18847 target = gen_lowpart (mode, operands[0]);
18849 regno0 = true_regnum (target);
18850 regno1 = INVALID_REGNUM;
18851 regno2 = INVALID_REGNUM;
18853 if (parts.base)
18855 parts.base = gen_lowpart (mode, parts.base);
18856 regno1 = true_regnum (parts.base);
18859 if (parts.index)
18861 parts.index = gen_lowpart (mode, parts.index);
18862 regno2 = true_regnum (parts.index);
18865 if (parts.disp)
18866 parts.disp = gen_lowpart (mode, parts.disp);
18868 if (parts.scale > 1)
18870 /* Case r1 = r1 + ... */
18871 if (regno1 == regno0)
18873 /* If we have a case r1 = r1 + C * r2 then we
18874 should use multiplication which is very
18875 expensive. Assume cost model is wrong if we
18876 have such case here. */
18877 gcc_assert (regno2 != regno0);
18879 for (adds = parts.scale; adds > 0; adds--)
18880 ix86_emit_binop (PLUS, mode, target, parts.index);
18882 else
18884 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18885 if (regno0 != regno2)
18886 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18888 /* Use shift for scaling. */
18889 ix86_emit_binop (ASHIFT, mode, target,
18890 GEN_INT (exact_log2 (parts.scale)));
18892 if (parts.base)
18893 ix86_emit_binop (PLUS, mode, target, parts.base);
18895 if (parts.disp && parts.disp != const0_rtx)
18896 ix86_emit_binop (PLUS, mode, target, parts.disp);
18899 else if (!parts.base && !parts.index)
18901 gcc_assert(parts.disp);
18902 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18904 else
18906 if (!parts.base)
18908 if (regno0 != regno2)
18909 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18911 else if (!parts.index)
18913 if (regno0 != regno1)
18914 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18916 else
18918 if (regno0 == regno1)
18919 tmp = parts.index;
18920 else if (regno0 == regno2)
18921 tmp = parts.base;
18922 else
18924 rtx tmp1;
18926 /* Find better operand for SET instruction, depending
18927 on which definition is farther from the insn. */
18928 if (find_nearest_reg_def (insn, regno1, regno2))
18929 tmp = parts.index, tmp1 = parts.base;
18930 else
18931 tmp = parts.base, tmp1 = parts.index;
18933 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18935 if (parts.disp && parts.disp != const0_rtx)
18936 ix86_emit_binop (PLUS, mode, target, parts.disp);
18938 ix86_emit_binop (PLUS, mode, target, tmp1);
18939 return;
18942 ix86_emit_binop (PLUS, mode, target, tmp);
18945 if (parts.disp && parts.disp != const0_rtx)
18946 ix86_emit_binop (PLUS, mode, target, parts.disp);
18950 /* Return true if it is ok to optimize an ADD operation to LEA
18951 operation to avoid flag register consumation. For most processors,
18952 ADD is faster than LEA. For the processors like BONNELL, if the
18953 destination register of LEA holds an actual address which will be
18954 used soon, LEA is better and otherwise ADD is better. */
18956 bool
18957 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
18959 unsigned int regno0 = true_regnum (operands[0]);
18960 unsigned int regno1 = true_regnum (operands[1]);
18961 unsigned int regno2 = true_regnum (operands[2]);
18963 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18964 if (regno0 != regno1 && regno0 != regno2)
18965 return true;
18967 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18968 return false;
18970 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18973 /* Return true if destination reg of SET_BODY is shift count of
18974 USE_BODY. */
18976 static bool
18977 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18979 rtx set_dest;
18980 rtx shift_rtx;
18981 int i;
18983 /* Retrieve destination of SET_BODY. */
18984 switch (GET_CODE (set_body))
18986 case SET:
18987 set_dest = SET_DEST (set_body);
18988 if (!set_dest || !REG_P (set_dest))
18989 return false;
18990 break;
18991 case PARALLEL:
18992 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18993 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18994 use_body))
18995 return true;
18996 default:
18997 return false;
18998 break;
19001 /* Retrieve shift count of USE_BODY. */
19002 switch (GET_CODE (use_body))
19004 case SET:
19005 shift_rtx = XEXP (use_body, 1);
19006 break;
19007 case PARALLEL:
19008 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
19009 if (ix86_dep_by_shift_count_body (set_body,
19010 XVECEXP (use_body, 0, i)))
19011 return true;
19012 default:
19013 return false;
19014 break;
19017 if (shift_rtx
19018 && (GET_CODE (shift_rtx) == ASHIFT
19019 || GET_CODE (shift_rtx) == LSHIFTRT
19020 || GET_CODE (shift_rtx) == ASHIFTRT
19021 || GET_CODE (shift_rtx) == ROTATE
19022 || GET_CODE (shift_rtx) == ROTATERT))
19024 rtx shift_count = XEXP (shift_rtx, 1);
19026 /* Return true if shift count is dest of SET_BODY. */
19027 if (REG_P (shift_count))
19029 /* Add check since it can be invoked before register
19030 allocation in pre-reload schedule. */
19031 if (reload_completed
19032 && true_regnum (set_dest) == true_regnum (shift_count))
19033 return true;
19034 else if (REGNO(set_dest) == REGNO(shift_count))
19035 return true;
19039 return false;
19042 /* Return true if destination reg of SET_INSN is shift count of
19043 USE_INSN. */
19045 bool
19046 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
19048 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
19049 PATTERN (use_insn));
19052 /* Return TRUE or FALSE depending on whether the unary operator meets the
19053 appropriate constraints. */
19055 bool
19056 ix86_unary_operator_ok (enum rtx_code,
19057 machine_mode,
19058 rtx operands[2])
19060 /* If one of operands is memory, source and destination must match. */
19061 if ((MEM_P (operands[0])
19062 || MEM_P (operands[1]))
19063 && ! rtx_equal_p (operands[0], operands[1]))
19064 return false;
19065 return true;
19068 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
19069 are ok, keeping in mind the possible movddup alternative. */
19071 bool
19072 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
19074 if (MEM_P (operands[0]))
19075 return rtx_equal_p (operands[0], operands[1 + high]);
19076 if (MEM_P (operands[1]) && MEM_P (operands[2]))
19077 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
19078 return true;
19081 /* Post-reload splitter for converting an SF or DFmode value in an
19082 SSE register into an unsigned SImode. */
19084 void
19085 ix86_split_convert_uns_si_sse (rtx operands[])
19087 machine_mode vecmode;
19088 rtx value, large, zero_or_two31, input, two31, x;
19090 large = operands[1];
19091 zero_or_two31 = operands[2];
19092 input = operands[3];
19093 two31 = operands[4];
19094 vecmode = GET_MODE (large);
19095 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
19097 /* Load up the value into the low element. We must ensure that the other
19098 elements are valid floats -- zero is the easiest such value. */
19099 if (MEM_P (input))
19101 if (vecmode == V4SFmode)
19102 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
19103 else
19104 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
19106 else
19108 input = gen_rtx_REG (vecmode, REGNO (input));
19109 emit_move_insn (value, CONST0_RTX (vecmode));
19110 if (vecmode == V4SFmode)
19111 emit_insn (gen_sse_movss (value, value, input));
19112 else
19113 emit_insn (gen_sse2_movsd (value, value, input));
19116 emit_move_insn (large, two31);
19117 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19119 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19120 emit_insn (gen_rtx_SET (VOIDmode, large, x));
19122 x = gen_rtx_AND (vecmode, zero_or_two31, large);
19123 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
19125 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19126 emit_insn (gen_rtx_SET (VOIDmode, value, x));
19128 large = gen_rtx_REG (V4SImode, REGNO (large));
19129 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19131 x = gen_rtx_REG (V4SImode, REGNO (value));
19132 if (vecmode == V4SFmode)
19133 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19134 else
19135 emit_insn (gen_sse2_cvttpd2dq (x, value));
19136 value = x;
19138 emit_insn (gen_xorv4si3 (value, value, large));
19141 /* Convert an unsigned DImode value into a DFmode, using only SSE.
19142 Expects the 64-bit DImode to be supplied in a pair of integral
19143 registers. Requires SSE2; will use SSE3 if available. For x86_32,
19144 -mfpmath=sse, !optimize_size only. */
19146 void
19147 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19149 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19150 rtx int_xmm, fp_xmm;
19151 rtx biases, exponents;
19152 rtx x;
19154 int_xmm = gen_reg_rtx (V4SImode);
19155 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19156 emit_insn (gen_movdi_to_sse (int_xmm, input));
19157 else if (TARGET_SSE_SPLIT_REGS)
19159 emit_clobber (int_xmm);
19160 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19162 else
19164 x = gen_reg_rtx (V2DImode);
19165 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19166 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19169 x = gen_rtx_CONST_VECTOR (V4SImode,
19170 gen_rtvec (4, GEN_INT (0x43300000UL),
19171 GEN_INT (0x45300000UL),
19172 const0_rtx, const0_rtx));
19173 exponents = validize_mem (force_const_mem (V4SImode, x));
19175 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19176 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19178 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19179 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19180 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19181 (0x1.0p84 + double(fp_value_hi_xmm)).
19182 Note these exponents differ by 32. */
19184 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19186 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19187 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
19188 real_ldexp (&bias_lo_rvt, &dconst1, 52);
19189 real_ldexp (&bias_hi_rvt, &dconst1, 84);
19190 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19191 x = const_double_from_real_value (bias_hi_rvt, DFmode);
19192 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19193 biases = validize_mem (force_const_mem (V2DFmode, biases));
19194 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19196 /* Add the upper and lower DFmode values together. */
19197 if (TARGET_SSE3)
19198 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19199 else
19201 x = copy_to_mode_reg (V2DFmode, fp_xmm);
19202 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19203 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19206 ix86_expand_vector_extract (false, target, fp_xmm, 0);
19209 /* Not used, but eases macroization of patterns. */
19210 void
19211 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19213 gcc_unreachable ();
19216 /* Convert an unsigned SImode value into a DFmode. Only currently used
19217 for SSE, but applicable anywhere. */
19219 void
19220 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19222 REAL_VALUE_TYPE TWO31r;
19223 rtx x, fp;
19225 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19226 NULL, 1, OPTAB_DIRECT);
19228 fp = gen_reg_rtx (DFmode);
19229 emit_insn (gen_floatsidf2 (fp, x));
19231 real_ldexp (&TWO31r, &dconst1, 31);
19232 x = const_double_from_real_value (TWO31r, DFmode);
19234 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19235 if (x != target)
19236 emit_move_insn (target, x);
19239 /* Convert a signed DImode value into a DFmode. Only used for SSE in
19240 32-bit mode; otherwise we have a direct convert instruction. */
19242 void
19243 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19245 REAL_VALUE_TYPE TWO32r;
19246 rtx fp_lo, fp_hi, x;
19248 fp_lo = gen_reg_rtx (DFmode);
19249 fp_hi = gen_reg_rtx (DFmode);
19251 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19253 real_ldexp (&TWO32r, &dconst1, 32);
19254 x = const_double_from_real_value (TWO32r, DFmode);
19255 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19257 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19259 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19260 0, OPTAB_DIRECT);
19261 if (x != target)
19262 emit_move_insn (target, x);
19265 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19266 For x86_32, -mfpmath=sse, !optimize_size only. */
19267 void
19268 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19270 REAL_VALUE_TYPE ONE16r;
19271 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19273 real_ldexp (&ONE16r, &dconst1, 16);
19274 x = const_double_from_real_value (ONE16r, SFmode);
19275 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19276 NULL, 0, OPTAB_DIRECT);
19277 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19278 NULL, 0, OPTAB_DIRECT);
19279 fp_hi = gen_reg_rtx (SFmode);
19280 fp_lo = gen_reg_rtx (SFmode);
19281 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19282 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19283 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19284 0, OPTAB_DIRECT);
19285 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19286 0, OPTAB_DIRECT);
19287 if (!rtx_equal_p (target, fp_hi))
19288 emit_move_insn (target, fp_hi);
19291 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19292 a vector of unsigned ints VAL to vector of floats TARGET. */
19294 void
19295 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19297 rtx tmp[8];
19298 REAL_VALUE_TYPE TWO16r;
19299 machine_mode intmode = GET_MODE (val);
19300 machine_mode fltmode = GET_MODE (target);
19301 rtx (*cvt) (rtx, rtx);
19303 if (intmode == V4SImode)
19304 cvt = gen_floatv4siv4sf2;
19305 else
19306 cvt = gen_floatv8siv8sf2;
19307 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19308 tmp[0] = force_reg (intmode, tmp[0]);
19309 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19310 OPTAB_DIRECT);
19311 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19312 NULL_RTX, 1, OPTAB_DIRECT);
19313 tmp[3] = gen_reg_rtx (fltmode);
19314 emit_insn (cvt (tmp[3], tmp[1]));
19315 tmp[4] = gen_reg_rtx (fltmode);
19316 emit_insn (cvt (tmp[4], tmp[2]));
19317 real_ldexp (&TWO16r, &dconst1, 16);
19318 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19319 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19320 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19321 OPTAB_DIRECT);
19322 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19323 OPTAB_DIRECT);
19324 if (tmp[7] != target)
19325 emit_move_insn (target, tmp[7]);
19328 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19329 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19330 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19331 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19334 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19336 REAL_VALUE_TYPE TWO31r;
19337 rtx two31r, tmp[4];
19338 machine_mode mode = GET_MODE (val);
19339 machine_mode scalarmode = GET_MODE_INNER (mode);
19340 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19341 rtx (*cmp) (rtx, rtx, rtx, rtx);
19342 int i;
19344 for (i = 0; i < 3; i++)
19345 tmp[i] = gen_reg_rtx (mode);
19346 real_ldexp (&TWO31r, &dconst1, 31);
19347 two31r = const_double_from_real_value (TWO31r, scalarmode);
19348 two31r = ix86_build_const_vector (mode, 1, two31r);
19349 two31r = force_reg (mode, two31r);
19350 switch (mode)
19352 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19353 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19354 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19355 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19356 default: gcc_unreachable ();
19358 tmp[3] = gen_rtx_LE (mode, two31r, val);
19359 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19360 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19361 0, OPTAB_DIRECT);
19362 if (intmode == V4SImode || TARGET_AVX2)
19363 *xorp = expand_simple_binop (intmode, ASHIFT,
19364 gen_lowpart (intmode, tmp[0]),
19365 GEN_INT (31), NULL_RTX, 0,
19366 OPTAB_DIRECT);
19367 else
19369 rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31);
19370 two31 = ix86_build_const_vector (intmode, 1, two31);
19371 *xorp = expand_simple_binop (intmode, AND,
19372 gen_lowpart (intmode, tmp[0]),
19373 two31, NULL_RTX, 0,
19374 OPTAB_DIRECT);
19376 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19377 0, OPTAB_DIRECT);
19380 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19381 then replicate the value for all elements of the vector
19382 register. */
19385 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19387 int i, n_elt;
19388 rtvec v;
19389 machine_mode scalar_mode;
19391 switch (mode)
19393 case V64QImode:
19394 case V32QImode:
19395 case V16QImode:
19396 case V32HImode:
19397 case V16HImode:
19398 case V8HImode:
19399 case V16SImode:
19400 case V8SImode:
19401 case V4SImode:
19402 case V8DImode:
19403 case V4DImode:
19404 case V2DImode:
19405 gcc_assert (vect);
19406 case V16SFmode:
19407 case V8SFmode:
19408 case V4SFmode:
19409 case V8DFmode:
19410 case V4DFmode:
19411 case V2DFmode:
19412 n_elt = GET_MODE_NUNITS (mode);
19413 v = rtvec_alloc (n_elt);
19414 scalar_mode = GET_MODE_INNER (mode);
19416 RTVEC_ELT (v, 0) = value;
19418 for (i = 1; i < n_elt; ++i)
19419 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19421 return gen_rtx_CONST_VECTOR (mode, v);
19423 default:
19424 gcc_unreachable ();
19428 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19429 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19430 for an SSE register. If VECT is true, then replicate the mask for
19431 all elements of the vector register. If INVERT is true, then create
19432 a mask excluding the sign bit. */
19435 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19437 machine_mode vec_mode, imode;
19438 wide_int w;
19439 rtx mask, v;
19441 switch (mode)
19443 case V16SImode:
19444 case V16SFmode:
19445 case V8SImode:
19446 case V4SImode:
19447 case V8SFmode:
19448 case V4SFmode:
19449 vec_mode = mode;
19450 mode = GET_MODE_INNER (mode);
19451 imode = SImode;
19452 break;
19454 case V8DImode:
19455 case V4DImode:
19456 case V2DImode:
19457 case V8DFmode:
19458 case V4DFmode:
19459 case V2DFmode:
19460 vec_mode = mode;
19461 mode = GET_MODE_INNER (mode);
19462 imode = DImode;
19463 break;
19465 case TImode:
19466 case TFmode:
19467 vec_mode = VOIDmode;
19468 imode = TImode;
19469 break;
19471 default:
19472 gcc_unreachable ();
19475 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (mode) - 1,
19476 GET_MODE_BITSIZE (mode));
19477 if (invert)
19478 w = wi::bit_not (w);
19480 /* Force this value into the low part of a fp vector constant. */
19481 mask = immed_wide_int_const (w, imode);
19482 mask = gen_lowpart (mode, mask);
19484 if (vec_mode == VOIDmode)
19485 return force_reg (mode, mask);
19487 v = ix86_build_const_vector (vec_mode, vect, mask);
19488 return force_reg (vec_mode, v);
19491 /* Generate code for floating point ABS or NEG. */
19493 void
19494 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19495 rtx operands[])
19497 rtx mask, set, dst, src;
19498 bool use_sse = false;
19499 bool vector_mode = VECTOR_MODE_P (mode);
19500 machine_mode vmode = mode;
19502 if (vector_mode)
19503 use_sse = true;
19504 else if (mode == TFmode)
19505 use_sse = true;
19506 else if (TARGET_SSE_MATH)
19508 use_sse = SSE_FLOAT_MODE_P (mode);
19509 if (mode == SFmode)
19510 vmode = V4SFmode;
19511 else if (mode == DFmode)
19512 vmode = V2DFmode;
19515 /* NEG and ABS performed with SSE use bitwise mask operations.
19516 Create the appropriate mask now. */
19517 if (use_sse)
19518 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19519 else
19520 mask = NULL_RTX;
19522 dst = operands[0];
19523 src = operands[1];
19525 set = gen_rtx_fmt_e (code, mode, src);
19526 set = gen_rtx_SET (VOIDmode, dst, set);
19528 if (mask)
19530 rtx use, clob;
19531 rtvec par;
19533 use = gen_rtx_USE (VOIDmode, mask);
19534 if (vector_mode)
19535 par = gen_rtvec (2, set, use);
19536 else
19538 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19539 par = gen_rtvec (3, set, use, clob);
19541 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19543 else
19544 emit_insn (set);
19547 /* Expand a copysign operation. Special case operand 0 being a constant. */
19549 void
19550 ix86_expand_copysign (rtx operands[])
19552 machine_mode mode, vmode;
19553 rtx dest, op0, op1, mask, nmask;
19555 dest = operands[0];
19556 op0 = operands[1];
19557 op1 = operands[2];
19559 mode = GET_MODE (dest);
19561 if (mode == SFmode)
19562 vmode = V4SFmode;
19563 else if (mode == DFmode)
19564 vmode = V2DFmode;
19565 else
19566 vmode = mode;
19568 if (CONST_DOUBLE_P (op0))
19570 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19572 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19573 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19575 if (mode == SFmode || mode == DFmode)
19577 if (op0 == CONST0_RTX (mode))
19578 op0 = CONST0_RTX (vmode);
19579 else
19581 rtx v = ix86_build_const_vector (vmode, false, op0);
19583 op0 = force_reg (vmode, v);
19586 else if (op0 != CONST0_RTX (mode))
19587 op0 = force_reg (mode, op0);
19589 mask = ix86_build_signbit_mask (vmode, 0, 0);
19591 if (mode == SFmode)
19592 copysign_insn = gen_copysignsf3_const;
19593 else if (mode == DFmode)
19594 copysign_insn = gen_copysigndf3_const;
19595 else
19596 copysign_insn = gen_copysigntf3_const;
19598 emit_insn (copysign_insn (dest, op0, op1, mask));
19600 else
19602 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19604 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19605 mask = ix86_build_signbit_mask (vmode, 0, 0);
19607 if (mode == SFmode)
19608 copysign_insn = gen_copysignsf3_var;
19609 else if (mode == DFmode)
19610 copysign_insn = gen_copysigndf3_var;
19611 else
19612 copysign_insn = gen_copysigntf3_var;
19614 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19618 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19619 be a constant, and so has already been expanded into a vector constant. */
19621 void
19622 ix86_split_copysign_const (rtx operands[])
19624 machine_mode mode, vmode;
19625 rtx dest, op0, mask, x;
19627 dest = operands[0];
19628 op0 = operands[1];
19629 mask = operands[3];
19631 mode = GET_MODE (dest);
19632 vmode = GET_MODE (mask);
19634 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19635 x = gen_rtx_AND (vmode, dest, mask);
19636 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19638 if (op0 != CONST0_RTX (vmode))
19640 x = gen_rtx_IOR (vmode, dest, op0);
19641 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19645 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19646 so we have to do two masks. */
19648 void
19649 ix86_split_copysign_var (rtx operands[])
19651 machine_mode mode, vmode;
19652 rtx dest, scratch, op0, op1, mask, nmask, x;
19654 dest = operands[0];
19655 scratch = operands[1];
19656 op0 = operands[2];
19657 op1 = operands[3];
19658 nmask = operands[4];
19659 mask = operands[5];
19661 mode = GET_MODE (dest);
19662 vmode = GET_MODE (mask);
19664 if (rtx_equal_p (op0, op1))
19666 /* Shouldn't happen often (it's useless, obviously), but when it does
19667 we'd generate incorrect code if we continue below. */
19668 emit_move_insn (dest, op0);
19669 return;
19672 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19674 gcc_assert (REGNO (op1) == REGNO (scratch));
19676 x = gen_rtx_AND (vmode, scratch, mask);
19677 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19679 dest = mask;
19680 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19681 x = gen_rtx_NOT (vmode, dest);
19682 x = gen_rtx_AND (vmode, x, op0);
19683 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19685 else
19687 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19689 x = gen_rtx_AND (vmode, scratch, mask);
19691 else /* alternative 2,4 */
19693 gcc_assert (REGNO (mask) == REGNO (scratch));
19694 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19695 x = gen_rtx_AND (vmode, scratch, op1);
19697 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19699 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19701 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19702 x = gen_rtx_AND (vmode, dest, nmask);
19704 else /* alternative 3,4 */
19706 gcc_assert (REGNO (nmask) == REGNO (dest));
19707 dest = nmask;
19708 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19709 x = gen_rtx_AND (vmode, dest, op0);
19711 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19714 x = gen_rtx_IOR (vmode, dest, scratch);
19715 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19718 /* Return TRUE or FALSE depending on whether the first SET in INSN
19719 has source and destination with matching CC modes, and that the
19720 CC mode is at least as constrained as REQ_MODE. */
19722 bool
19723 ix86_match_ccmode (rtx insn, machine_mode req_mode)
19725 rtx set;
19726 machine_mode set_mode;
19728 set = PATTERN (insn);
19729 if (GET_CODE (set) == PARALLEL)
19730 set = XVECEXP (set, 0, 0);
19731 gcc_assert (GET_CODE (set) == SET);
19732 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19734 set_mode = GET_MODE (SET_DEST (set));
19735 switch (set_mode)
19737 case CCNOmode:
19738 if (req_mode != CCNOmode
19739 && (req_mode != CCmode
19740 || XEXP (SET_SRC (set), 1) != const0_rtx))
19741 return false;
19742 break;
19743 case CCmode:
19744 if (req_mode == CCGCmode)
19745 return false;
19746 /* FALLTHRU */
19747 case CCGCmode:
19748 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19749 return false;
19750 /* FALLTHRU */
19751 case CCGOCmode:
19752 if (req_mode == CCZmode)
19753 return false;
19754 /* FALLTHRU */
19755 case CCZmode:
19756 break;
19758 case CCAmode:
19759 case CCCmode:
19760 case CCOmode:
19761 case CCSmode:
19762 if (set_mode != req_mode)
19763 return false;
19764 break;
19766 default:
19767 gcc_unreachable ();
19770 return GET_MODE (SET_SRC (set)) == set_mode;
19773 /* Generate insn patterns to do an integer compare of OPERANDS. */
19775 static rtx
19776 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19778 machine_mode cmpmode;
19779 rtx tmp, flags;
19781 cmpmode = SELECT_CC_MODE (code, op0, op1);
19782 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19784 /* This is very simple, but making the interface the same as in the
19785 FP case makes the rest of the code easier. */
19786 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19787 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19789 /* Return the test that should be put into the flags user, i.e.
19790 the bcc, scc, or cmov instruction. */
19791 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19794 /* Figure out whether to use ordered or unordered fp comparisons.
19795 Return the appropriate mode to use. */
19797 machine_mode
19798 ix86_fp_compare_mode (enum rtx_code)
19800 /* ??? In order to make all comparisons reversible, we do all comparisons
19801 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19802 all forms trapping and nontrapping comparisons, we can make inequality
19803 comparisons trapping again, since it results in better code when using
19804 FCOM based compares. */
19805 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19808 machine_mode
19809 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19811 machine_mode mode = GET_MODE (op0);
19813 if (SCALAR_FLOAT_MODE_P (mode))
19815 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19816 return ix86_fp_compare_mode (code);
19819 switch (code)
19821 /* Only zero flag is needed. */
19822 case EQ: /* ZF=0 */
19823 case NE: /* ZF!=0 */
19824 return CCZmode;
19825 /* Codes needing carry flag. */
19826 case GEU: /* CF=0 */
19827 case LTU: /* CF=1 */
19828 /* Detect overflow checks. They need just the carry flag. */
19829 if (GET_CODE (op0) == PLUS
19830 && rtx_equal_p (op1, XEXP (op0, 0)))
19831 return CCCmode;
19832 else
19833 return CCmode;
19834 case GTU: /* CF=0 & ZF=0 */
19835 case LEU: /* CF=1 | ZF=1 */
19836 return CCmode;
19837 /* Codes possibly doable only with sign flag when
19838 comparing against zero. */
19839 case GE: /* SF=OF or SF=0 */
19840 case LT: /* SF<>OF or SF=1 */
19841 if (op1 == const0_rtx)
19842 return CCGOCmode;
19843 else
19844 /* For other cases Carry flag is not required. */
19845 return CCGCmode;
19846 /* Codes doable only with sign flag when comparing
19847 against zero, but we miss jump instruction for it
19848 so we need to use relational tests against overflow
19849 that thus needs to be zero. */
19850 case GT: /* ZF=0 & SF=OF */
19851 case LE: /* ZF=1 | SF<>OF */
19852 if (op1 == const0_rtx)
19853 return CCNOmode;
19854 else
19855 return CCGCmode;
19856 /* strcmp pattern do (use flags) and combine may ask us for proper
19857 mode. */
19858 case USE:
19859 return CCmode;
19860 default:
19861 gcc_unreachable ();
19865 /* Return the fixed registers used for condition codes. */
19867 static bool
19868 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19870 *p1 = FLAGS_REG;
19871 *p2 = FPSR_REG;
19872 return true;
19875 /* If two condition code modes are compatible, return a condition code
19876 mode which is compatible with both. Otherwise, return
19877 VOIDmode. */
19879 static machine_mode
19880 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
19882 if (m1 == m2)
19883 return m1;
19885 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19886 return VOIDmode;
19888 if ((m1 == CCGCmode && m2 == CCGOCmode)
19889 || (m1 == CCGOCmode && m2 == CCGCmode))
19890 return CCGCmode;
19892 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19893 return m2;
19894 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19895 return m1;
19897 switch (m1)
19899 default:
19900 gcc_unreachable ();
19902 case CCmode:
19903 case CCGCmode:
19904 case CCGOCmode:
19905 case CCNOmode:
19906 case CCAmode:
19907 case CCCmode:
19908 case CCOmode:
19909 case CCSmode:
19910 case CCZmode:
19911 switch (m2)
19913 default:
19914 return VOIDmode;
19916 case CCmode:
19917 case CCGCmode:
19918 case CCGOCmode:
19919 case CCNOmode:
19920 case CCAmode:
19921 case CCCmode:
19922 case CCOmode:
19923 case CCSmode:
19924 case CCZmode:
19925 return CCmode;
19928 case CCFPmode:
19929 case CCFPUmode:
19930 /* These are only compatible with themselves, which we already
19931 checked above. */
19932 return VOIDmode;
19937 /* Return a comparison we can do and that it is equivalent to
19938 swap_condition (code) apart possibly from orderedness.
19939 But, never change orderedness if TARGET_IEEE_FP, returning
19940 UNKNOWN in that case if necessary. */
19942 static enum rtx_code
19943 ix86_fp_swap_condition (enum rtx_code code)
19945 switch (code)
19947 case GT: /* GTU - CF=0 & ZF=0 */
19948 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19949 case GE: /* GEU - CF=0 */
19950 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19951 case UNLT: /* LTU - CF=1 */
19952 return TARGET_IEEE_FP ? UNKNOWN : GT;
19953 case UNLE: /* LEU - CF=1 | ZF=1 */
19954 return TARGET_IEEE_FP ? UNKNOWN : GE;
19955 default:
19956 return swap_condition (code);
19960 /* Return cost of comparison CODE using the best strategy for performance.
19961 All following functions do use number of instructions as a cost metrics.
19962 In future this should be tweaked to compute bytes for optimize_size and
19963 take into account performance of various instructions on various CPUs. */
19965 static int
19966 ix86_fp_comparison_cost (enum rtx_code code)
19968 int arith_cost;
19970 /* The cost of code using bit-twiddling on %ah. */
19971 switch (code)
19973 case UNLE:
19974 case UNLT:
19975 case LTGT:
19976 case GT:
19977 case GE:
19978 case UNORDERED:
19979 case ORDERED:
19980 case UNEQ:
19981 arith_cost = 4;
19982 break;
19983 case LT:
19984 case NE:
19985 case EQ:
19986 case UNGE:
19987 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19988 break;
19989 case LE:
19990 case UNGT:
19991 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19992 break;
19993 default:
19994 gcc_unreachable ();
19997 switch (ix86_fp_comparison_strategy (code))
19999 case IX86_FPCMP_COMI:
20000 return arith_cost > 4 ? 3 : 2;
20001 case IX86_FPCMP_SAHF:
20002 return arith_cost > 4 ? 4 : 3;
20003 default:
20004 return arith_cost;
20008 /* Return strategy to use for floating-point. We assume that fcomi is always
20009 preferrable where available, since that is also true when looking at size
20010 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
20012 enum ix86_fpcmp_strategy
20013 ix86_fp_comparison_strategy (enum rtx_code)
20015 /* Do fcomi/sahf based test when profitable. */
20017 if (TARGET_CMOVE)
20018 return IX86_FPCMP_COMI;
20020 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
20021 return IX86_FPCMP_SAHF;
20023 return IX86_FPCMP_ARITH;
20026 /* Swap, force into registers, or otherwise massage the two operands
20027 to a fp comparison. The operands are updated in place; the new
20028 comparison code is returned. */
20030 static enum rtx_code
20031 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
20033 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
20034 rtx op0 = *pop0, op1 = *pop1;
20035 machine_mode op_mode = GET_MODE (op0);
20036 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
20038 /* All of the unordered compare instructions only work on registers.
20039 The same is true of the fcomi compare instructions. The XFmode
20040 compare instructions require registers except when comparing
20041 against zero or when converting operand 1 from fixed point to
20042 floating point. */
20044 if (!is_sse
20045 && (fpcmp_mode == CCFPUmode
20046 || (op_mode == XFmode
20047 && ! (standard_80387_constant_p (op0) == 1
20048 || standard_80387_constant_p (op1) == 1)
20049 && GET_CODE (op1) != FLOAT)
20050 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
20052 op0 = force_reg (op_mode, op0);
20053 op1 = force_reg (op_mode, op1);
20055 else
20057 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
20058 things around if they appear profitable, otherwise force op0
20059 into a register. */
20061 if (standard_80387_constant_p (op0) == 0
20062 || (MEM_P (op0)
20063 && ! (standard_80387_constant_p (op1) == 0
20064 || MEM_P (op1))))
20066 enum rtx_code new_code = ix86_fp_swap_condition (code);
20067 if (new_code != UNKNOWN)
20069 std::swap (op0, op1);
20070 code = new_code;
20074 if (!REG_P (op0))
20075 op0 = force_reg (op_mode, op0);
20077 if (CONSTANT_P (op1))
20079 int tmp = standard_80387_constant_p (op1);
20080 if (tmp == 0)
20081 op1 = validize_mem (force_const_mem (op_mode, op1));
20082 else if (tmp == 1)
20084 if (TARGET_CMOVE)
20085 op1 = force_reg (op_mode, op1);
20087 else
20088 op1 = force_reg (op_mode, op1);
20092 /* Try to rearrange the comparison to make it cheaper. */
20093 if (ix86_fp_comparison_cost (code)
20094 > ix86_fp_comparison_cost (swap_condition (code))
20095 && (REG_P (op1) || can_create_pseudo_p ()))
20097 std::swap (op0, op1);
20098 code = swap_condition (code);
20099 if (!REG_P (op0))
20100 op0 = force_reg (op_mode, op0);
20103 *pop0 = op0;
20104 *pop1 = op1;
20105 return code;
20108 /* Convert comparison codes we use to represent FP comparison to integer
20109 code that will result in proper branch. Return UNKNOWN if no such code
20110 is available. */
20112 enum rtx_code
20113 ix86_fp_compare_code_to_integer (enum rtx_code code)
20115 switch (code)
20117 case GT:
20118 return GTU;
20119 case GE:
20120 return GEU;
20121 case ORDERED:
20122 case UNORDERED:
20123 return code;
20124 break;
20125 case UNEQ:
20126 return EQ;
20127 break;
20128 case UNLT:
20129 return LTU;
20130 break;
20131 case UNLE:
20132 return LEU;
20133 break;
20134 case LTGT:
20135 return NE;
20136 break;
20137 default:
20138 return UNKNOWN;
20142 /* Generate insn patterns to do a floating point compare of OPERANDS. */
20144 static rtx
20145 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20147 machine_mode fpcmp_mode, intcmp_mode;
20148 rtx tmp, tmp2;
20150 fpcmp_mode = ix86_fp_compare_mode (code);
20151 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20153 /* Do fcomi/sahf based test when profitable. */
20154 switch (ix86_fp_comparison_strategy (code))
20156 case IX86_FPCMP_COMI:
20157 intcmp_mode = fpcmp_mode;
20158 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20159 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20160 tmp);
20161 emit_insn (tmp);
20162 break;
20164 case IX86_FPCMP_SAHF:
20165 intcmp_mode = fpcmp_mode;
20166 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20167 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20168 tmp);
20170 if (!scratch)
20171 scratch = gen_reg_rtx (HImode);
20172 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20173 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20174 break;
20176 case IX86_FPCMP_ARITH:
20177 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
20178 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20179 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20180 if (!scratch)
20181 scratch = gen_reg_rtx (HImode);
20182 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
20184 /* In the unordered case, we have to check C2 for NaN's, which
20185 doesn't happen to work out to anything nice combination-wise.
20186 So do some bit twiddling on the value we've got in AH to come
20187 up with an appropriate set of condition codes. */
20189 intcmp_mode = CCNOmode;
20190 switch (code)
20192 case GT:
20193 case UNGT:
20194 if (code == GT || !TARGET_IEEE_FP)
20196 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20197 code = EQ;
20199 else
20201 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20202 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20203 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20204 intcmp_mode = CCmode;
20205 code = GEU;
20207 break;
20208 case LT:
20209 case UNLT:
20210 if (code == LT && TARGET_IEEE_FP)
20212 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20213 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20214 intcmp_mode = CCmode;
20215 code = EQ;
20217 else
20219 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20220 code = NE;
20222 break;
20223 case GE:
20224 case UNGE:
20225 if (code == GE || !TARGET_IEEE_FP)
20227 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20228 code = EQ;
20230 else
20232 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20233 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20234 code = NE;
20236 break;
20237 case LE:
20238 case UNLE:
20239 if (code == LE && TARGET_IEEE_FP)
20241 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20242 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20243 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20244 intcmp_mode = CCmode;
20245 code = LTU;
20247 else
20249 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20250 code = NE;
20252 break;
20253 case EQ:
20254 case UNEQ:
20255 if (code == EQ && TARGET_IEEE_FP)
20257 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20258 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20259 intcmp_mode = CCmode;
20260 code = EQ;
20262 else
20264 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20265 code = NE;
20267 break;
20268 case NE:
20269 case LTGT:
20270 if (code == NE && TARGET_IEEE_FP)
20272 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20273 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20274 GEN_INT (0x40)));
20275 code = NE;
20277 else
20279 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20280 code = EQ;
20282 break;
20284 case UNORDERED:
20285 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20286 code = NE;
20287 break;
20288 case ORDERED:
20289 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20290 code = EQ;
20291 break;
20293 default:
20294 gcc_unreachable ();
20296 break;
20298 default:
20299 gcc_unreachable();
20302 /* Return the test that should be put into the flags user, i.e.
20303 the bcc, scc, or cmov instruction. */
20304 return gen_rtx_fmt_ee (code, VOIDmode,
20305 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20306 const0_rtx);
20309 static rtx
20310 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20312 rtx ret;
20314 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20315 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20317 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20319 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20320 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20322 else
20323 ret = ix86_expand_int_compare (code, op0, op1);
20325 return ret;
20328 void
20329 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20331 machine_mode mode = GET_MODE (op0);
20332 rtx tmp;
20334 switch (mode)
20336 case SFmode:
20337 case DFmode:
20338 case XFmode:
20339 case QImode:
20340 case HImode:
20341 case SImode:
20342 simple:
20343 tmp = ix86_expand_compare (code, op0, op1);
20344 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20345 gen_rtx_LABEL_REF (VOIDmode, label),
20346 pc_rtx);
20347 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20348 return;
20350 case DImode:
20351 if (TARGET_64BIT)
20352 goto simple;
20353 case TImode:
20354 /* Expand DImode branch into multiple compare+branch. */
20356 rtx lo[2], hi[2];
20357 rtx_code_label *label2;
20358 enum rtx_code code1, code2, code3;
20359 machine_mode submode;
20361 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20363 std::swap (op0, op1);
20364 code = swap_condition (code);
20367 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20368 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20370 submode = mode == DImode ? SImode : DImode;
20372 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20373 avoid two branches. This costs one extra insn, so disable when
20374 optimizing for size. */
20376 if ((code == EQ || code == NE)
20377 && (!optimize_insn_for_size_p ()
20378 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20380 rtx xor0, xor1;
20382 xor1 = hi[0];
20383 if (hi[1] != const0_rtx)
20384 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20385 NULL_RTX, 0, OPTAB_WIDEN);
20387 xor0 = lo[0];
20388 if (lo[1] != const0_rtx)
20389 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20390 NULL_RTX, 0, OPTAB_WIDEN);
20392 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20393 NULL_RTX, 0, OPTAB_WIDEN);
20395 ix86_expand_branch (code, tmp, const0_rtx, label);
20396 return;
20399 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20400 op1 is a constant and the low word is zero, then we can just
20401 examine the high word. Similarly for low word -1 and
20402 less-or-equal-than or greater-than. */
20404 if (CONST_INT_P (hi[1]))
20405 switch (code)
20407 case LT: case LTU: case GE: case GEU:
20408 if (lo[1] == const0_rtx)
20410 ix86_expand_branch (code, hi[0], hi[1], label);
20411 return;
20413 break;
20414 case LE: case LEU: case GT: case GTU:
20415 if (lo[1] == constm1_rtx)
20417 ix86_expand_branch (code, hi[0], hi[1], label);
20418 return;
20420 break;
20421 default:
20422 break;
20425 /* Otherwise, we need two or three jumps. */
20427 label2 = gen_label_rtx ();
20429 code1 = code;
20430 code2 = swap_condition (code);
20431 code3 = unsigned_condition (code);
20433 switch (code)
20435 case LT: case GT: case LTU: case GTU:
20436 break;
20438 case LE: code1 = LT; code2 = GT; break;
20439 case GE: code1 = GT; code2 = LT; break;
20440 case LEU: code1 = LTU; code2 = GTU; break;
20441 case GEU: code1 = GTU; code2 = LTU; break;
20443 case EQ: code1 = UNKNOWN; code2 = NE; break;
20444 case NE: code2 = UNKNOWN; break;
20446 default:
20447 gcc_unreachable ();
20451 * a < b =>
20452 * if (hi(a) < hi(b)) goto true;
20453 * if (hi(a) > hi(b)) goto false;
20454 * if (lo(a) < lo(b)) goto true;
20455 * false:
20458 if (code1 != UNKNOWN)
20459 ix86_expand_branch (code1, hi[0], hi[1], label);
20460 if (code2 != UNKNOWN)
20461 ix86_expand_branch (code2, hi[0], hi[1], label2);
20463 ix86_expand_branch (code3, lo[0], lo[1], label);
20465 if (code2 != UNKNOWN)
20466 emit_label (label2);
20467 return;
20470 default:
20471 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20472 goto simple;
20476 /* Split branch based on floating point condition. */
20477 void
20478 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20479 rtx target1, rtx target2, rtx tmp)
20481 rtx condition;
20482 rtx i;
20484 if (target2 != pc_rtx)
20486 std::swap (target1, target2);
20487 code = reverse_condition_maybe_unordered (code);
20490 condition = ix86_expand_fp_compare (code, op1, op2,
20491 tmp);
20493 i = emit_jump_insn (gen_rtx_SET
20494 (VOIDmode, pc_rtx,
20495 gen_rtx_IF_THEN_ELSE (VOIDmode,
20496 condition, target1, target2)));
20497 if (split_branch_probability >= 0)
20498 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20501 void
20502 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20504 rtx ret;
20506 gcc_assert (GET_MODE (dest) == QImode);
20508 ret = ix86_expand_compare (code, op0, op1);
20509 PUT_MODE (ret, QImode);
20510 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20513 /* Expand comparison setting or clearing carry flag. Return true when
20514 successful and set pop for the operation. */
20515 static bool
20516 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20518 machine_mode mode =
20519 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20521 /* Do not handle double-mode compares that go through special path. */
20522 if (mode == (TARGET_64BIT ? TImode : DImode))
20523 return false;
20525 if (SCALAR_FLOAT_MODE_P (mode))
20527 rtx compare_op;
20528 rtx_insn *compare_seq;
20530 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20532 /* Shortcut: following common codes never translate
20533 into carry flag compares. */
20534 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20535 || code == ORDERED || code == UNORDERED)
20536 return false;
20538 /* These comparisons require zero flag; swap operands so they won't. */
20539 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20540 && !TARGET_IEEE_FP)
20542 std::swap (op0, op1);
20543 code = swap_condition (code);
20546 /* Try to expand the comparison and verify that we end up with
20547 carry flag based comparison. This fails to be true only when
20548 we decide to expand comparison using arithmetic that is not
20549 too common scenario. */
20550 start_sequence ();
20551 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20552 compare_seq = get_insns ();
20553 end_sequence ();
20555 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20556 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20557 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20558 else
20559 code = GET_CODE (compare_op);
20561 if (code != LTU && code != GEU)
20562 return false;
20564 emit_insn (compare_seq);
20565 *pop = compare_op;
20566 return true;
20569 if (!INTEGRAL_MODE_P (mode))
20570 return false;
20572 switch (code)
20574 case LTU:
20575 case GEU:
20576 break;
20578 /* Convert a==0 into (unsigned)a<1. */
20579 case EQ:
20580 case NE:
20581 if (op1 != const0_rtx)
20582 return false;
20583 op1 = const1_rtx;
20584 code = (code == EQ ? LTU : GEU);
20585 break;
20587 /* Convert a>b into b<a or a>=b-1. */
20588 case GTU:
20589 case LEU:
20590 if (CONST_INT_P (op1))
20592 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20593 /* Bail out on overflow. We still can swap operands but that
20594 would force loading of the constant into register. */
20595 if (op1 == const0_rtx
20596 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20597 return false;
20598 code = (code == GTU ? GEU : LTU);
20600 else
20602 std::swap (op0, op1);
20603 code = (code == GTU ? LTU : GEU);
20605 break;
20607 /* Convert a>=0 into (unsigned)a<0x80000000. */
20608 case LT:
20609 case GE:
20610 if (mode == DImode || op1 != const0_rtx)
20611 return false;
20612 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20613 code = (code == LT ? GEU : LTU);
20614 break;
20615 case LE:
20616 case GT:
20617 if (mode == DImode || op1 != constm1_rtx)
20618 return false;
20619 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20620 code = (code == LE ? GEU : LTU);
20621 break;
20623 default:
20624 return false;
20626 /* Swapping operands may cause constant to appear as first operand. */
20627 if (!nonimmediate_operand (op0, VOIDmode))
20629 if (!can_create_pseudo_p ())
20630 return false;
20631 op0 = force_reg (mode, op0);
20633 *pop = ix86_expand_compare (code, op0, op1);
20634 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20635 return true;
20638 bool
20639 ix86_expand_int_movcc (rtx operands[])
20641 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20642 rtx_insn *compare_seq;
20643 rtx compare_op;
20644 machine_mode mode = GET_MODE (operands[0]);
20645 bool sign_bit_compare_p = false;
20646 rtx op0 = XEXP (operands[1], 0);
20647 rtx op1 = XEXP (operands[1], 1);
20649 if (GET_MODE (op0) == TImode
20650 || (GET_MODE (op0) == DImode
20651 && !TARGET_64BIT))
20652 return false;
20654 start_sequence ();
20655 compare_op = ix86_expand_compare (code, op0, op1);
20656 compare_seq = get_insns ();
20657 end_sequence ();
20659 compare_code = GET_CODE (compare_op);
20661 if ((op1 == const0_rtx && (code == GE || code == LT))
20662 || (op1 == constm1_rtx && (code == GT || code == LE)))
20663 sign_bit_compare_p = true;
20665 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20666 HImode insns, we'd be swallowed in word prefix ops. */
20668 if ((mode != HImode || TARGET_FAST_PREFIX)
20669 && (mode != (TARGET_64BIT ? TImode : DImode))
20670 && CONST_INT_P (operands[2])
20671 && CONST_INT_P (operands[3]))
20673 rtx out = operands[0];
20674 HOST_WIDE_INT ct = INTVAL (operands[2]);
20675 HOST_WIDE_INT cf = INTVAL (operands[3]);
20676 HOST_WIDE_INT diff;
20678 diff = ct - cf;
20679 /* Sign bit compares are better done using shifts than we do by using
20680 sbb. */
20681 if (sign_bit_compare_p
20682 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20684 /* Detect overlap between destination and compare sources. */
20685 rtx tmp = out;
20687 if (!sign_bit_compare_p)
20689 rtx flags;
20690 bool fpcmp = false;
20692 compare_code = GET_CODE (compare_op);
20694 flags = XEXP (compare_op, 0);
20696 if (GET_MODE (flags) == CCFPmode
20697 || GET_MODE (flags) == CCFPUmode)
20699 fpcmp = true;
20700 compare_code
20701 = ix86_fp_compare_code_to_integer (compare_code);
20704 /* To simplify rest of code, restrict to the GEU case. */
20705 if (compare_code == LTU)
20707 std::swap (ct, cf);
20708 compare_code = reverse_condition (compare_code);
20709 code = reverse_condition (code);
20711 else
20713 if (fpcmp)
20714 PUT_CODE (compare_op,
20715 reverse_condition_maybe_unordered
20716 (GET_CODE (compare_op)));
20717 else
20718 PUT_CODE (compare_op,
20719 reverse_condition (GET_CODE (compare_op)));
20721 diff = ct - cf;
20723 if (reg_overlap_mentioned_p (out, op0)
20724 || reg_overlap_mentioned_p (out, op1))
20725 tmp = gen_reg_rtx (mode);
20727 if (mode == DImode)
20728 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20729 else
20730 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20731 flags, compare_op));
20733 else
20735 if (code == GT || code == GE)
20736 code = reverse_condition (code);
20737 else
20739 std::swap (ct, cf);
20740 diff = ct - cf;
20742 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20745 if (diff == 1)
20748 * cmpl op0,op1
20749 * sbbl dest,dest
20750 * [addl dest, ct]
20752 * Size 5 - 8.
20754 if (ct)
20755 tmp = expand_simple_binop (mode, PLUS,
20756 tmp, GEN_INT (ct),
20757 copy_rtx (tmp), 1, OPTAB_DIRECT);
20759 else if (cf == -1)
20762 * cmpl op0,op1
20763 * sbbl dest,dest
20764 * orl $ct, dest
20766 * Size 8.
20768 tmp = expand_simple_binop (mode, IOR,
20769 tmp, GEN_INT (ct),
20770 copy_rtx (tmp), 1, OPTAB_DIRECT);
20772 else if (diff == -1 && ct)
20775 * cmpl op0,op1
20776 * sbbl dest,dest
20777 * notl dest
20778 * [addl dest, cf]
20780 * Size 8 - 11.
20782 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20783 if (cf)
20784 tmp = expand_simple_binop (mode, PLUS,
20785 copy_rtx (tmp), GEN_INT (cf),
20786 copy_rtx (tmp), 1, OPTAB_DIRECT);
20788 else
20791 * cmpl op0,op1
20792 * sbbl dest,dest
20793 * [notl dest]
20794 * andl cf - ct, dest
20795 * [addl dest, ct]
20797 * Size 8 - 11.
20800 if (cf == 0)
20802 cf = ct;
20803 ct = 0;
20804 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20807 tmp = expand_simple_binop (mode, AND,
20808 copy_rtx (tmp),
20809 gen_int_mode (cf - ct, mode),
20810 copy_rtx (tmp), 1, OPTAB_DIRECT);
20811 if (ct)
20812 tmp = expand_simple_binop (mode, PLUS,
20813 copy_rtx (tmp), GEN_INT (ct),
20814 copy_rtx (tmp), 1, OPTAB_DIRECT);
20817 if (!rtx_equal_p (tmp, out))
20818 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20820 return true;
20823 if (diff < 0)
20825 machine_mode cmp_mode = GET_MODE (op0);
20826 enum rtx_code new_code;
20828 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20830 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20832 /* We may be reversing unordered compare to normal compare, that
20833 is not valid in general (we may convert non-trapping condition
20834 to trapping one), however on i386 we currently emit all
20835 comparisons unordered. */
20836 new_code = reverse_condition_maybe_unordered (code);
20838 else
20839 new_code = ix86_reverse_condition (code, cmp_mode);
20840 if (new_code != UNKNOWN)
20842 std::swap (ct, cf);
20843 diff = -diff;
20844 code = new_code;
20848 compare_code = UNKNOWN;
20849 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20850 && CONST_INT_P (op1))
20852 if (op1 == const0_rtx
20853 && (code == LT || code == GE))
20854 compare_code = code;
20855 else if (op1 == constm1_rtx)
20857 if (code == LE)
20858 compare_code = LT;
20859 else if (code == GT)
20860 compare_code = GE;
20864 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20865 if (compare_code != UNKNOWN
20866 && GET_MODE (op0) == GET_MODE (out)
20867 && (cf == -1 || ct == -1))
20869 /* If lea code below could be used, only optimize
20870 if it results in a 2 insn sequence. */
20872 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20873 || diff == 3 || diff == 5 || diff == 9)
20874 || (compare_code == LT && ct == -1)
20875 || (compare_code == GE && cf == -1))
20878 * notl op1 (if necessary)
20879 * sarl $31, op1
20880 * orl cf, op1
20882 if (ct != -1)
20884 cf = ct;
20885 ct = -1;
20886 code = reverse_condition (code);
20889 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20891 out = expand_simple_binop (mode, IOR,
20892 out, GEN_INT (cf),
20893 out, 1, OPTAB_DIRECT);
20894 if (out != operands[0])
20895 emit_move_insn (operands[0], out);
20897 return true;
20902 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20903 || diff == 3 || diff == 5 || diff == 9)
20904 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20905 && (mode != DImode
20906 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20909 * xorl dest,dest
20910 * cmpl op1,op2
20911 * setcc dest
20912 * lea cf(dest*(ct-cf)),dest
20914 * Size 14.
20916 * This also catches the degenerate setcc-only case.
20919 rtx tmp;
20920 int nops;
20922 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20924 nops = 0;
20925 /* On x86_64 the lea instruction operates on Pmode, so we need
20926 to get arithmetics done in proper mode to match. */
20927 if (diff == 1)
20928 tmp = copy_rtx (out);
20929 else
20931 rtx out1;
20932 out1 = copy_rtx (out);
20933 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20934 nops++;
20935 if (diff & 1)
20937 tmp = gen_rtx_PLUS (mode, tmp, out1);
20938 nops++;
20941 if (cf != 0)
20943 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20944 nops++;
20946 if (!rtx_equal_p (tmp, out))
20948 if (nops == 1)
20949 out = force_operand (tmp, copy_rtx (out));
20950 else
20951 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20953 if (!rtx_equal_p (out, operands[0]))
20954 emit_move_insn (operands[0], copy_rtx (out));
20956 return true;
20960 * General case: Jumpful:
20961 * xorl dest,dest cmpl op1, op2
20962 * cmpl op1, op2 movl ct, dest
20963 * setcc dest jcc 1f
20964 * decl dest movl cf, dest
20965 * andl (cf-ct),dest 1:
20966 * addl ct,dest
20968 * Size 20. Size 14.
20970 * This is reasonably steep, but branch mispredict costs are
20971 * high on modern cpus, so consider failing only if optimizing
20972 * for space.
20975 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20976 && BRANCH_COST (optimize_insn_for_speed_p (),
20977 false) >= 2)
20979 if (cf == 0)
20981 machine_mode cmp_mode = GET_MODE (op0);
20982 enum rtx_code new_code;
20984 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20986 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20988 /* We may be reversing unordered compare to normal compare,
20989 that is not valid in general (we may convert non-trapping
20990 condition to trapping one), however on i386 we currently
20991 emit all comparisons unordered. */
20992 new_code = reverse_condition_maybe_unordered (code);
20994 else
20996 new_code = ix86_reverse_condition (code, cmp_mode);
20997 if (compare_code != UNKNOWN && new_code != UNKNOWN)
20998 compare_code = reverse_condition (compare_code);
21001 if (new_code != UNKNOWN)
21003 cf = ct;
21004 ct = 0;
21005 code = new_code;
21009 if (compare_code != UNKNOWN)
21011 /* notl op1 (if needed)
21012 sarl $31, op1
21013 andl (cf-ct), op1
21014 addl ct, op1
21016 For x < 0 (resp. x <= -1) there will be no notl,
21017 so if possible swap the constants to get rid of the
21018 complement.
21019 True/false will be -1/0 while code below (store flag
21020 followed by decrement) is 0/-1, so the constants need
21021 to be exchanged once more. */
21023 if (compare_code == GE || !cf)
21025 code = reverse_condition (code);
21026 compare_code = LT;
21028 else
21029 std::swap (ct, cf);
21031 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21033 else
21035 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21037 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
21038 constm1_rtx,
21039 copy_rtx (out), 1, OPTAB_DIRECT);
21042 out = expand_simple_binop (mode, AND, copy_rtx (out),
21043 gen_int_mode (cf - ct, mode),
21044 copy_rtx (out), 1, OPTAB_DIRECT);
21045 if (ct)
21046 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
21047 copy_rtx (out), 1, OPTAB_DIRECT);
21048 if (!rtx_equal_p (out, operands[0]))
21049 emit_move_insn (operands[0], copy_rtx (out));
21051 return true;
21055 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21057 /* Try a few things more with specific constants and a variable. */
21059 optab op;
21060 rtx var, orig_out, out, tmp;
21062 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
21063 return false;
21065 /* If one of the two operands is an interesting constant, load a
21066 constant with the above and mask it in with a logical operation. */
21068 if (CONST_INT_P (operands[2]))
21070 var = operands[3];
21071 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
21072 operands[3] = constm1_rtx, op = and_optab;
21073 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
21074 operands[3] = const0_rtx, op = ior_optab;
21075 else
21076 return false;
21078 else if (CONST_INT_P (operands[3]))
21080 var = operands[2];
21081 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21082 operands[2] = constm1_rtx, op = and_optab;
21083 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21084 operands[2] = const0_rtx, op = ior_optab;
21085 else
21086 return false;
21088 else
21089 return false;
21091 orig_out = operands[0];
21092 tmp = gen_reg_rtx (mode);
21093 operands[0] = tmp;
21095 /* Recurse to get the constant loaded. */
21096 if (ix86_expand_int_movcc (operands) == 0)
21097 return false;
21099 /* Mask in the interesting variable. */
21100 out = expand_binop (mode, op, var, tmp, orig_out, 0,
21101 OPTAB_WIDEN);
21102 if (!rtx_equal_p (out, orig_out))
21103 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21105 return true;
21109 * For comparison with above,
21111 * movl cf,dest
21112 * movl ct,tmp
21113 * cmpl op1,op2
21114 * cmovcc tmp,dest
21116 * Size 15.
21119 if (! nonimmediate_operand (operands[2], mode))
21120 operands[2] = force_reg (mode, operands[2]);
21121 if (! nonimmediate_operand (operands[3], mode))
21122 operands[3] = force_reg (mode, operands[3]);
21124 if (! register_operand (operands[2], VOIDmode)
21125 && (mode == QImode
21126 || ! register_operand (operands[3], VOIDmode)))
21127 operands[2] = force_reg (mode, operands[2]);
21129 if (mode == QImode
21130 && ! register_operand (operands[3], VOIDmode))
21131 operands[3] = force_reg (mode, operands[3]);
21133 emit_insn (compare_seq);
21134 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21135 gen_rtx_IF_THEN_ELSE (mode,
21136 compare_op, operands[2],
21137 operands[3])));
21138 return true;
21141 /* Swap, force into registers, or otherwise massage the two operands
21142 to an sse comparison with a mask result. Thus we differ a bit from
21143 ix86_prepare_fp_compare_args which expects to produce a flags result.
21145 The DEST operand exists to help determine whether to commute commutative
21146 operators. The POP0/POP1 operands are updated in place. The new
21147 comparison code is returned, or UNKNOWN if not implementable. */
21149 static enum rtx_code
21150 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21151 rtx *pop0, rtx *pop1)
21153 switch (code)
21155 case LTGT:
21156 case UNEQ:
21157 /* AVX supports all the needed comparisons. */
21158 if (TARGET_AVX)
21159 break;
21160 /* We have no LTGT as an operator. We could implement it with
21161 NE & ORDERED, but this requires an extra temporary. It's
21162 not clear that it's worth it. */
21163 return UNKNOWN;
21165 case LT:
21166 case LE:
21167 case UNGT:
21168 case UNGE:
21169 /* These are supported directly. */
21170 break;
21172 case EQ:
21173 case NE:
21174 case UNORDERED:
21175 case ORDERED:
21176 /* AVX has 3 operand comparisons, no need to swap anything. */
21177 if (TARGET_AVX)
21178 break;
21179 /* For commutative operators, try to canonicalize the destination
21180 operand to be first in the comparison - this helps reload to
21181 avoid extra moves. */
21182 if (!dest || !rtx_equal_p (dest, *pop1))
21183 break;
21184 /* FALLTHRU */
21186 case GE:
21187 case GT:
21188 case UNLE:
21189 case UNLT:
21190 /* These are not supported directly before AVX, and furthermore
21191 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
21192 comparison operands to transform into something that is
21193 supported. */
21194 std::swap (*pop0, *pop1);
21195 code = swap_condition (code);
21196 break;
21198 default:
21199 gcc_unreachable ();
21202 return code;
21205 /* Detect conditional moves that exactly match min/max operational
21206 semantics. Note that this is IEEE safe, as long as we don't
21207 interchange the operands.
21209 Returns FALSE if this conditional move doesn't match a MIN/MAX,
21210 and TRUE if the operation is successful and instructions are emitted. */
21212 static bool
21213 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21214 rtx cmp_op1, rtx if_true, rtx if_false)
21216 machine_mode mode;
21217 bool is_min;
21218 rtx tmp;
21220 if (code == LT)
21222 else if (code == UNGE)
21223 std::swap (if_true, if_false);
21224 else
21225 return false;
21227 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21228 is_min = true;
21229 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21230 is_min = false;
21231 else
21232 return false;
21234 mode = GET_MODE (dest);
21236 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21237 but MODE may be a vector mode and thus not appropriate. */
21238 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21240 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21241 rtvec v;
21243 if_true = force_reg (mode, if_true);
21244 v = gen_rtvec (2, if_true, if_false);
21245 tmp = gen_rtx_UNSPEC (mode, v, u);
21247 else
21249 code = is_min ? SMIN : SMAX;
21250 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21253 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
21254 return true;
21257 /* Expand an sse vector comparison. Return the register with the result. */
21259 static rtx
21260 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21261 rtx op_true, rtx op_false)
21263 machine_mode mode = GET_MODE (dest);
21264 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21266 /* In general case result of comparison can differ from operands' type. */
21267 machine_mode cmp_mode;
21269 /* In AVX512F the result of comparison is an integer mask. */
21270 bool maskcmp = false;
21271 rtx x;
21273 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21275 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21276 gcc_assert (cmp_mode != BLKmode);
21278 maskcmp = true;
21280 else
21281 cmp_mode = cmp_ops_mode;
21284 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21285 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21286 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21288 if (optimize
21289 || reg_overlap_mentioned_p (dest, op_true)
21290 || reg_overlap_mentioned_p (dest, op_false))
21291 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21293 /* Compare patterns for int modes are unspec in AVX512F only. */
21294 if (maskcmp && (code == GT || code == EQ))
21296 rtx (*gen)(rtx, rtx, rtx);
21298 switch (cmp_ops_mode)
21300 case V64QImode:
21301 gcc_assert (TARGET_AVX512BW);
21302 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
21303 break;
21304 case V32HImode:
21305 gcc_assert (TARGET_AVX512BW);
21306 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
21307 break;
21308 case V16SImode:
21309 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21310 break;
21311 case V8DImode:
21312 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21313 break;
21314 default:
21315 gen = NULL;
21318 if (gen)
21320 emit_insn (gen (dest, cmp_op0, cmp_op1));
21321 return dest;
21324 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21326 if (cmp_mode != mode && !maskcmp)
21328 x = force_reg (cmp_ops_mode, x);
21329 convert_move (dest, x, false);
21331 else
21332 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21334 return dest;
21337 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21338 operations. This is used for both scalar and vector conditional moves. */
21340 static void
21341 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21343 machine_mode mode = GET_MODE (dest);
21344 machine_mode cmpmode = GET_MODE (cmp);
21346 /* In AVX512F the result of comparison is an integer mask. */
21347 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21349 rtx t2, t3, x;
21351 if (vector_all_ones_operand (op_true, mode)
21352 && rtx_equal_p (op_false, CONST0_RTX (mode))
21353 && !maskcmp)
21355 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
21357 else if (op_false == CONST0_RTX (mode)
21358 && !maskcmp)
21360 op_true = force_reg (mode, op_true);
21361 x = gen_rtx_AND (mode, cmp, op_true);
21362 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21364 else if (op_true == CONST0_RTX (mode)
21365 && !maskcmp)
21367 op_false = force_reg (mode, op_false);
21368 x = gen_rtx_NOT (mode, cmp);
21369 x = gen_rtx_AND (mode, x, op_false);
21370 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21372 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21373 && !maskcmp)
21375 op_false = force_reg (mode, op_false);
21376 x = gen_rtx_IOR (mode, cmp, op_false);
21377 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21379 else if (TARGET_XOP
21380 && !maskcmp)
21382 op_true = force_reg (mode, op_true);
21384 if (!nonimmediate_operand (op_false, mode))
21385 op_false = force_reg (mode, op_false);
21387 emit_insn (gen_rtx_SET (mode, dest,
21388 gen_rtx_IF_THEN_ELSE (mode, cmp,
21389 op_true,
21390 op_false)));
21392 else
21394 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21395 rtx d = dest;
21397 if (!nonimmediate_operand (op_true, mode))
21398 op_true = force_reg (mode, op_true);
21400 op_false = force_reg (mode, op_false);
21402 switch (mode)
21404 case V4SFmode:
21405 if (TARGET_SSE4_1)
21406 gen = gen_sse4_1_blendvps;
21407 break;
21408 case V2DFmode:
21409 if (TARGET_SSE4_1)
21410 gen = gen_sse4_1_blendvpd;
21411 break;
21412 case V16QImode:
21413 case V8HImode:
21414 case V4SImode:
21415 case V2DImode:
21416 if (TARGET_SSE4_1)
21418 gen = gen_sse4_1_pblendvb;
21419 if (mode != V16QImode)
21420 d = gen_reg_rtx (V16QImode);
21421 op_false = gen_lowpart (V16QImode, op_false);
21422 op_true = gen_lowpart (V16QImode, op_true);
21423 cmp = gen_lowpart (V16QImode, cmp);
21425 break;
21426 case V8SFmode:
21427 if (TARGET_AVX)
21428 gen = gen_avx_blendvps256;
21429 break;
21430 case V4DFmode:
21431 if (TARGET_AVX)
21432 gen = gen_avx_blendvpd256;
21433 break;
21434 case V32QImode:
21435 case V16HImode:
21436 case V8SImode:
21437 case V4DImode:
21438 if (TARGET_AVX2)
21440 gen = gen_avx2_pblendvb;
21441 if (mode != V32QImode)
21442 d = gen_reg_rtx (V32QImode);
21443 op_false = gen_lowpart (V32QImode, op_false);
21444 op_true = gen_lowpart (V32QImode, op_true);
21445 cmp = gen_lowpart (V32QImode, cmp);
21447 break;
21449 case V64QImode:
21450 gen = gen_avx512bw_blendmv64qi;
21451 break;
21452 case V32HImode:
21453 gen = gen_avx512bw_blendmv32hi;
21454 break;
21455 case V16SImode:
21456 gen = gen_avx512f_blendmv16si;
21457 break;
21458 case V8DImode:
21459 gen = gen_avx512f_blendmv8di;
21460 break;
21461 case V8DFmode:
21462 gen = gen_avx512f_blendmv8df;
21463 break;
21464 case V16SFmode:
21465 gen = gen_avx512f_blendmv16sf;
21466 break;
21468 default:
21469 break;
21472 if (gen != NULL)
21474 emit_insn (gen (d, op_false, op_true, cmp));
21475 if (d != dest)
21476 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21478 else
21480 op_true = force_reg (mode, op_true);
21482 t2 = gen_reg_rtx (mode);
21483 if (optimize)
21484 t3 = gen_reg_rtx (mode);
21485 else
21486 t3 = dest;
21488 x = gen_rtx_AND (mode, op_true, cmp);
21489 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21491 x = gen_rtx_NOT (mode, cmp);
21492 x = gen_rtx_AND (mode, x, op_false);
21493 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21495 x = gen_rtx_IOR (mode, t3, t2);
21496 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21501 /* Expand a floating-point conditional move. Return true if successful. */
21503 bool
21504 ix86_expand_fp_movcc (rtx operands[])
21506 machine_mode mode = GET_MODE (operands[0]);
21507 enum rtx_code code = GET_CODE (operands[1]);
21508 rtx tmp, compare_op;
21509 rtx op0 = XEXP (operands[1], 0);
21510 rtx op1 = XEXP (operands[1], 1);
21512 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21514 machine_mode cmode;
21516 /* Since we've no cmove for sse registers, don't force bad register
21517 allocation just to gain access to it. Deny movcc when the
21518 comparison mode doesn't match the move mode. */
21519 cmode = GET_MODE (op0);
21520 if (cmode == VOIDmode)
21521 cmode = GET_MODE (op1);
21522 if (cmode != mode)
21523 return false;
21525 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21526 if (code == UNKNOWN)
21527 return false;
21529 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21530 operands[2], operands[3]))
21531 return true;
21533 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21534 operands[2], operands[3]);
21535 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21536 return true;
21539 if (GET_MODE (op0) == TImode
21540 || (GET_MODE (op0) == DImode
21541 && !TARGET_64BIT))
21542 return false;
21544 /* The floating point conditional move instructions don't directly
21545 support conditions resulting from a signed integer comparison. */
21547 compare_op = ix86_expand_compare (code, op0, op1);
21548 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21550 tmp = gen_reg_rtx (QImode);
21551 ix86_expand_setcc (tmp, code, op0, op1);
21553 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21556 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21557 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21558 operands[2], operands[3])));
21560 return true;
21563 /* Expand a floating-point vector conditional move; a vcond operation
21564 rather than a movcc operation. */
21566 bool
21567 ix86_expand_fp_vcond (rtx operands[])
21569 enum rtx_code code = GET_CODE (operands[3]);
21570 rtx cmp;
21572 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21573 &operands[4], &operands[5]);
21574 if (code == UNKNOWN)
21576 rtx temp;
21577 switch (GET_CODE (operands[3]))
21579 case LTGT:
21580 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21581 operands[5], operands[0], operands[0]);
21582 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21583 operands[5], operands[1], operands[2]);
21584 code = AND;
21585 break;
21586 case UNEQ:
21587 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21588 operands[5], operands[0], operands[0]);
21589 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21590 operands[5], operands[1], operands[2]);
21591 code = IOR;
21592 break;
21593 default:
21594 gcc_unreachable ();
21596 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21597 OPTAB_DIRECT);
21598 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21599 return true;
21602 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21603 operands[5], operands[1], operands[2]))
21604 return true;
21606 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21607 operands[1], operands[2]);
21608 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21609 return true;
21612 /* Expand a signed/unsigned integral vector conditional move. */
21614 bool
21615 ix86_expand_int_vcond (rtx operands[])
21617 machine_mode data_mode = GET_MODE (operands[0]);
21618 machine_mode mode = GET_MODE (operands[4]);
21619 enum rtx_code code = GET_CODE (operands[3]);
21620 bool negate = false;
21621 rtx x, cop0, cop1;
21623 cop0 = operands[4];
21624 cop1 = operands[5];
21626 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21627 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21628 if ((code == LT || code == GE)
21629 && data_mode == mode
21630 && cop1 == CONST0_RTX (mode)
21631 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21632 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21633 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21634 && (GET_MODE_SIZE (data_mode) == 16
21635 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21637 rtx negop = operands[2 - (code == LT)];
21638 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21639 if (negop == CONST1_RTX (data_mode))
21641 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21642 operands[0], 1, OPTAB_DIRECT);
21643 if (res != operands[0])
21644 emit_move_insn (operands[0], res);
21645 return true;
21647 else if (GET_MODE_INNER (data_mode) != DImode
21648 && vector_all_ones_operand (negop, data_mode))
21650 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21651 operands[0], 0, OPTAB_DIRECT);
21652 if (res != operands[0])
21653 emit_move_insn (operands[0], res);
21654 return true;
21658 if (!nonimmediate_operand (cop1, mode))
21659 cop1 = force_reg (mode, cop1);
21660 if (!general_operand (operands[1], data_mode))
21661 operands[1] = force_reg (data_mode, operands[1]);
21662 if (!general_operand (operands[2], data_mode))
21663 operands[2] = force_reg (data_mode, operands[2]);
21665 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21666 if (TARGET_XOP
21667 && (mode == V16QImode || mode == V8HImode
21668 || mode == V4SImode || mode == V2DImode))
21670 else
21672 /* Canonicalize the comparison to EQ, GT, GTU. */
21673 switch (code)
21675 case EQ:
21676 case GT:
21677 case GTU:
21678 break;
21680 case NE:
21681 case LE:
21682 case LEU:
21683 code = reverse_condition (code);
21684 negate = true;
21685 break;
21687 case GE:
21688 case GEU:
21689 code = reverse_condition (code);
21690 negate = true;
21691 /* FALLTHRU */
21693 case LT:
21694 case LTU:
21695 std::swap (cop0, cop1);
21696 code = swap_condition (code);
21697 break;
21699 default:
21700 gcc_unreachable ();
21703 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21704 if (mode == V2DImode)
21706 switch (code)
21708 case EQ:
21709 /* SSE4.1 supports EQ. */
21710 if (!TARGET_SSE4_1)
21711 return false;
21712 break;
21714 case GT:
21715 case GTU:
21716 /* SSE4.2 supports GT/GTU. */
21717 if (!TARGET_SSE4_2)
21718 return false;
21719 break;
21721 default:
21722 gcc_unreachable ();
21726 /* Unsigned parallel compare is not supported by the hardware.
21727 Play some tricks to turn this into a signed comparison
21728 against 0. */
21729 if (code == GTU)
21731 cop0 = force_reg (mode, cop0);
21733 switch (mode)
21735 case V16SImode:
21736 case V8DImode:
21737 case V8SImode:
21738 case V4DImode:
21739 case V4SImode:
21740 case V2DImode:
21742 rtx t1, t2, mask;
21743 rtx (*gen_sub3) (rtx, rtx, rtx);
21745 switch (mode)
21747 case V16SImode: gen_sub3 = gen_subv16si3; break;
21748 case V8DImode: gen_sub3 = gen_subv8di3; break;
21749 case V8SImode: gen_sub3 = gen_subv8si3; break;
21750 case V4DImode: gen_sub3 = gen_subv4di3; break;
21751 case V4SImode: gen_sub3 = gen_subv4si3; break;
21752 case V2DImode: gen_sub3 = gen_subv2di3; break;
21753 default:
21754 gcc_unreachable ();
21756 /* Subtract (-(INT MAX) - 1) from both operands to make
21757 them signed. */
21758 mask = ix86_build_signbit_mask (mode, true, false);
21759 t1 = gen_reg_rtx (mode);
21760 emit_insn (gen_sub3 (t1, cop0, mask));
21762 t2 = gen_reg_rtx (mode);
21763 emit_insn (gen_sub3 (t2, cop1, mask));
21765 cop0 = t1;
21766 cop1 = t2;
21767 code = GT;
21769 break;
21771 case V64QImode:
21772 case V32HImode:
21773 case V32QImode:
21774 case V16HImode:
21775 case V16QImode:
21776 case V8HImode:
21777 /* Perform a parallel unsigned saturating subtraction. */
21778 x = gen_reg_rtx (mode);
21779 emit_insn (gen_rtx_SET (VOIDmode, x,
21780 gen_rtx_US_MINUS (mode, cop0, cop1)));
21782 cop0 = x;
21783 cop1 = CONST0_RTX (mode);
21784 code = EQ;
21785 negate = !negate;
21786 break;
21788 default:
21789 gcc_unreachable ();
21794 /* Allow the comparison to be done in one mode, but the movcc to
21795 happen in another mode. */
21796 if (data_mode == mode)
21798 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21799 operands[1+negate], operands[2-negate]);
21801 else
21803 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21804 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21805 operands[1+negate], operands[2-negate]);
21806 if (GET_MODE (x) == mode)
21807 x = gen_lowpart (data_mode, x);
21810 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21811 operands[2-negate]);
21812 return true;
21815 /* AVX512F does support 64-byte integer vector operations,
21816 thus the longest vector we are faced with is V64QImode. */
21817 #define MAX_VECT_LEN 64
21819 struct expand_vec_perm_d
21821 rtx target, op0, op1;
21822 unsigned char perm[MAX_VECT_LEN];
21823 machine_mode vmode;
21824 unsigned char nelt;
21825 bool one_operand_p;
21826 bool testing_p;
21829 static bool
21830 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21831 struct expand_vec_perm_d *d)
21833 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21834 expander, so args are either in d, or in op0, op1 etc. */
21835 machine_mode mode = GET_MODE (d ? d->op0 : op0);
21836 machine_mode maskmode = mode;
21837 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21839 switch (mode)
21841 case V8HImode:
21842 if (TARGET_AVX512VL && TARGET_AVX512BW)
21843 gen = gen_avx512vl_vpermi2varv8hi3;
21844 break;
21845 case V16HImode:
21846 if (TARGET_AVX512VL && TARGET_AVX512BW)
21847 gen = gen_avx512vl_vpermi2varv16hi3;
21848 break;
21849 case V64QImode:
21850 if (TARGET_AVX512VBMI)
21851 gen = gen_avx512bw_vpermi2varv64qi3;
21852 break;
21853 case V32HImode:
21854 if (TARGET_AVX512BW)
21855 gen = gen_avx512bw_vpermi2varv32hi3;
21856 break;
21857 case V4SImode:
21858 if (TARGET_AVX512VL)
21859 gen = gen_avx512vl_vpermi2varv4si3;
21860 break;
21861 case V8SImode:
21862 if (TARGET_AVX512VL)
21863 gen = gen_avx512vl_vpermi2varv8si3;
21864 break;
21865 case V16SImode:
21866 if (TARGET_AVX512F)
21867 gen = gen_avx512f_vpermi2varv16si3;
21868 break;
21869 case V4SFmode:
21870 if (TARGET_AVX512VL)
21872 gen = gen_avx512vl_vpermi2varv4sf3;
21873 maskmode = V4SImode;
21875 break;
21876 case V8SFmode:
21877 if (TARGET_AVX512VL)
21879 gen = gen_avx512vl_vpermi2varv8sf3;
21880 maskmode = V8SImode;
21882 break;
21883 case V16SFmode:
21884 if (TARGET_AVX512F)
21886 gen = gen_avx512f_vpermi2varv16sf3;
21887 maskmode = V16SImode;
21889 break;
21890 case V2DImode:
21891 if (TARGET_AVX512VL)
21892 gen = gen_avx512vl_vpermi2varv2di3;
21893 break;
21894 case V4DImode:
21895 if (TARGET_AVX512VL)
21896 gen = gen_avx512vl_vpermi2varv4di3;
21897 break;
21898 case V8DImode:
21899 if (TARGET_AVX512F)
21900 gen = gen_avx512f_vpermi2varv8di3;
21901 break;
21902 case V2DFmode:
21903 if (TARGET_AVX512VL)
21905 gen = gen_avx512vl_vpermi2varv2df3;
21906 maskmode = V2DImode;
21908 break;
21909 case V4DFmode:
21910 if (TARGET_AVX512VL)
21912 gen = gen_avx512vl_vpermi2varv4df3;
21913 maskmode = V4DImode;
21915 break;
21916 case V8DFmode:
21917 if (TARGET_AVX512F)
21919 gen = gen_avx512f_vpermi2varv8df3;
21920 maskmode = V8DImode;
21922 break;
21923 default:
21924 break;
21927 if (gen == NULL)
21928 return false;
21930 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21931 expander, so args are either in d, or in op0, op1 etc. */
21932 if (d)
21934 rtx vec[64];
21935 target = d->target;
21936 op0 = d->op0;
21937 op1 = d->op1;
21938 for (int i = 0; i < d->nelt; ++i)
21939 vec[i] = GEN_INT (d->perm[i]);
21940 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
21943 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
21944 return true;
21947 /* Expand a variable vector permutation. */
21949 void
21950 ix86_expand_vec_perm (rtx operands[])
21952 rtx target = operands[0];
21953 rtx op0 = operands[1];
21954 rtx op1 = operands[2];
21955 rtx mask = operands[3];
21956 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21957 machine_mode mode = GET_MODE (op0);
21958 machine_mode maskmode = GET_MODE (mask);
21959 int w, e, i;
21960 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21962 /* Number of elements in the vector. */
21963 w = GET_MODE_NUNITS (mode);
21964 e = GET_MODE_UNIT_SIZE (mode);
21965 gcc_assert (w <= 64);
21967 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
21968 return;
21970 if (TARGET_AVX2)
21972 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21974 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21975 an constant shuffle operand. With a tiny bit of effort we can
21976 use VPERMD instead. A re-interpretation stall for V4DFmode is
21977 unfortunate but there's no avoiding it.
21978 Similarly for V16HImode we don't have instructions for variable
21979 shuffling, while for V32QImode we can use after preparing suitable
21980 masks vpshufb; vpshufb; vpermq; vpor. */
21982 if (mode == V16HImode)
21984 maskmode = mode = V32QImode;
21985 w = 32;
21986 e = 1;
21988 else
21990 maskmode = mode = V8SImode;
21991 w = 8;
21992 e = 4;
21994 t1 = gen_reg_rtx (maskmode);
21996 /* Replicate the low bits of the V4DImode mask into V8SImode:
21997 mask = { A B C D }
21998 t1 = { A A B B C C D D }. */
21999 for (i = 0; i < w / 2; ++i)
22000 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
22001 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22002 vt = force_reg (maskmode, vt);
22003 mask = gen_lowpart (maskmode, mask);
22004 if (maskmode == V8SImode)
22005 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
22006 else
22007 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
22009 /* Multiply the shuffle indicies by two. */
22010 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
22011 OPTAB_DIRECT);
22013 /* Add one to the odd shuffle indicies:
22014 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
22015 for (i = 0; i < w / 2; ++i)
22017 vec[i * 2] = const0_rtx;
22018 vec[i * 2 + 1] = const1_rtx;
22020 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22021 vt = validize_mem (force_const_mem (maskmode, vt));
22022 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
22023 OPTAB_DIRECT);
22025 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
22026 operands[3] = mask = t1;
22027 target = gen_reg_rtx (mode);
22028 op0 = gen_lowpart (mode, op0);
22029 op1 = gen_lowpart (mode, op1);
22032 switch (mode)
22034 case V8SImode:
22035 /* The VPERMD and VPERMPS instructions already properly ignore
22036 the high bits of the shuffle elements. No need for us to
22037 perform an AND ourselves. */
22038 if (one_operand_shuffle)
22040 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
22041 if (target != operands[0])
22042 emit_move_insn (operands[0],
22043 gen_lowpart (GET_MODE (operands[0]), target));
22045 else
22047 t1 = gen_reg_rtx (V8SImode);
22048 t2 = gen_reg_rtx (V8SImode);
22049 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
22050 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
22051 goto merge_two;
22053 return;
22055 case V8SFmode:
22056 mask = gen_lowpart (V8SImode, mask);
22057 if (one_operand_shuffle)
22058 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
22059 else
22061 t1 = gen_reg_rtx (V8SFmode);
22062 t2 = gen_reg_rtx (V8SFmode);
22063 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
22064 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
22065 goto merge_two;
22067 return;
22069 case V4SImode:
22070 /* By combining the two 128-bit input vectors into one 256-bit
22071 input vector, we can use VPERMD and VPERMPS for the full
22072 two-operand shuffle. */
22073 t1 = gen_reg_rtx (V8SImode);
22074 t2 = gen_reg_rtx (V8SImode);
22075 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
22076 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22077 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
22078 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
22079 return;
22081 case V4SFmode:
22082 t1 = gen_reg_rtx (V8SFmode);
22083 t2 = gen_reg_rtx (V8SImode);
22084 mask = gen_lowpart (V4SImode, mask);
22085 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
22086 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22087 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
22088 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
22089 return;
22091 case V32QImode:
22092 t1 = gen_reg_rtx (V32QImode);
22093 t2 = gen_reg_rtx (V32QImode);
22094 t3 = gen_reg_rtx (V32QImode);
22095 vt2 = GEN_INT (-128);
22096 for (i = 0; i < 32; i++)
22097 vec[i] = vt2;
22098 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22099 vt = force_reg (V32QImode, vt);
22100 for (i = 0; i < 32; i++)
22101 vec[i] = i < 16 ? vt2 : const0_rtx;
22102 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22103 vt2 = force_reg (V32QImode, vt2);
22104 /* From mask create two adjusted masks, which contain the same
22105 bits as mask in the low 7 bits of each vector element.
22106 The first mask will have the most significant bit clear
22107 if it requests element from the same 128-bit lane
22108 and MSB set if it requests element from the other 128-bit lane.
22109 The second mask will have the opposite values of the MSB,
22110 and additionally will have its 128-bit lanes swapped.
22111 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22112 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
22113 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22114 stands for other 12 bytes. */
22115 /* The bit whether element is from the same lane or the other
22116 lane is bit 4, so shift it up by 3 to the MSB position. */
22117 t5 = gen_reg_rtx (V4DImode);
22118 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22119 GEN_INT (3)));
22120 /* Clear MSB bits from the mask just in case it had them set. */
22121 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22122 /* After this t1 will have MSB set for elements from other lane. */
22123 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22124 /* Clear bits other than MSB. */
22125 emit_insn (gen_andv32qi3 (t1, t1, vt));
22126 /* Or in the lower bits from mask into t3. */
22127 emit_insn (gen_iorv32qi3 (t3, t1, t2));
22128 /* And invert MSB bits in t1, so MSB is set for elements from the same
22129 lane. */
22130 emit_insn (gen_xorv32qi3 (t1, t1, vt));
22131 /* Swap 128-bit lanes in t3. */
22132 t6 = gen_reg_rtx (V4DImode);
22133 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22134 const2_rtx, GEN_INT (3),
22135 const0_rtx, const1_rtx));
22136 /* And or in the lower bits from mask into t1. */
22137 emit_insn (gen_iorv32qi3 (t1, t1, t2));
22138 if (one_operand_shuffle)
22140 /* Each of these shuffles will put 0s in places where
22141 element from the other 128-bit lane is needed, otherwise
22142 will shuffle in the requested value. */
22143 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22144 gen_lowpart (V32QImode, t6)));
22145 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22146 /* For t3 the 128-bit lanes are swapped again. */
22147 t7 = gen_reg_rtx (V4DImode);
22148 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22149 const2_rtx, GEN_INT (3),
22150 const0_rtx, const1_rtx));
22151 /* And oring both together leads to the result. */
22152 emit_insn (gen_iorv32qi3 (target, t1,
22153 gen_lowpart (V32QImode, t7)));
22154 if (target != operands[0])
22155 emit_move_insn (operands[0],
22156 gen_lowpart (GET_MODE (operands[0]), target));
22157 return;
22160 t4 = gen_reg_rtx (V32QImode);
22161 /* Similarly to the above one_operand_shuffle code,
22162 just for repeated twice for each operand. merge_two:
22163 code will merge the two results together. */
22164 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22165 gen_lowpart (V32QImode, t6)));
22166 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22167 gen_lowpart (V32QImode, t6)));
22168 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22169 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22170 t7 = gen_reg_rtx (V4DImode);
22171 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22172 const2_rtx, GEN_INT (3),
22173 const0_rtx, const1_rtx));
22174 t8 = gen_reg_rtx (V4DImode);
22175 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22176 const2_rtx, GEN_INT (3),
22177 const0_rtx, const1_rtx));
22178 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22179 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22180 t1 = t4;
22181 t2 = t3;
22182 goto merge_two;
22184 default:
22185 gcc_assert (GET_MODE_SIZE (mode) <= 16);
22186 break;
22190 if (TARGET_XOP)
22192 /* The XOP VPPERM insn supports three inputs. By ignoring the
22193 one_operand_shuffle special case, we avoid creating another
22194 set of constant vectors in memory. */
22195 one_operand_shuffle = false;
22197 /* mask = mask & {2*w-1, ...} */
22198 vt = GEN_INT (2*w - 1);
22200 else
22202 /* mask = mask & {w-1, ...} */
22203 vt = GEN_INT (w - 1);
22206 for (i = 0; i < w; i++)
22207 vec[i] = vt;
22208 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22209 mask = expand_simple_binop (maskmode, AND, mask, vt,
22210 NULL_RTX, 0, OPTAB_DIRECT);
22212 /* For non-QImode operations, convert the word permutation control
22213 into a byte permutation control. */
22214 if (mode != V16QImode)
22216 mask = expand_simple_binop (maskmode, ASHIFT, mask,
22217 GEN_INT (exact_log2 (e)),
22218 NULL_RTX, 0, OPTAB_DIRECT);
22220 /* Convert mask to vector of chars. */
22221 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22223 /* Replicate each of the input bytes into byte positions:
22224 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22225 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22226 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
22227 for (i = 0; i < 16; ++i)
22228 vec[i] = GEN_INT (i/e * e);
22229 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22230 vt = validize_mem (force_const_mem (V16QImode, vt));
22231 if (TARGET_XOP)
22232 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22233 else
22234 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22236 /* Convert it into the byte positions by doing
22237 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
22238 for (i = 0; i < 16; ++i)
22239 vec[i] = GEN_INT (i % e);
22240 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22241 vt = validize_mem (force_const_mem (V16QImode, vt));
22242 emit_insn (gen_addv16qi3 (mask, mask, vt));
22245 /* The actual shuffle operations all operate on V16QImode. */
22246 op0 = gen_lowpart (V16QImode, op0);
22247 op1 = gen_lowpart (V16QImode, op1);
22249 if (TARGET_XOP)
22251 if (GET_MODE (target) != V16QImode)
22252 target = gen_reg_rtx (V16QImode);
22253 emit_insn (gen_xop_pperm (target, op0, op1, mask));
22254 if (target != operands[0])
22255 emit_move_insn (operands[0],
22256 gen_lowpart (GET_MODE (operands[0]), target));
22258 else if (one_operand_shuffle)
22260 if (GET_MODE (target) != V16QImode)
22261 target = gen_reg_rtx (V16QImode);
22262 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22263 if (target != operands[0])
22264 emit_move_insn (operands[0],
22265 gen_lowpart (GET_MODE (operands[0]), target));
22267 else
22269 rtx xops[6];
22270 bool ok;
22272 /* Shuffle the two input vectors independently. */
22273 t1 = gen_reg_rtx (V16QImode);
22274 t2 = gen_reg_rtx (V16QImode);
22275 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22276 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22278 merge_two:
22279 /* Then merge them together. The key is whether any given control
22280 element contained a bit set that indicates the second word. */
22281 mask = operands[3];
22282 vt = GEN_INT (w);
22283 if (maskmode == V2DImode && !TARGET_SSE4_1)
22285 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
22286 more shuffle to convert the V2DI input mask into a V4SI
22287 input mask. At which point the masking that expand_int_vcond
22288 will work as desired. */
22289 rtx t3 = gen_reg_rtx (V4SImode);
22290 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22291 const0_rtx, const0_rtx,
22292 const2_rtx, const2_rtx));
22293 mask = t3;
22294 maskmode = V4SImode;
22295 e = w = 4;
22298 for (i = 0; i < w; i++)
22299 vec[i] = vt;
22300 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22301 vt = force_reg (maskmode, vt);
22302 mask = expand_simple_binop (maskmode, AND, mask, vt,
22303 NULL_RTX, 0, OPTAB_DIRECT);
22305 if (GET_MODE (target) != mode)
22306 target = gen_reg_rtx (mode);
22307 xops[0] = target;
22308 xops[1] = gen_lowpart (mode, t2);
22309 xops[2] = gen_lowpart (mode, t1);
22310 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22311 xops[4] = mask;
22312 xops[5] = vt;
22313 ok = ix86_expand_int_vcond (xops);
22314 gcc_assert (ok);
22315 if (target != operands[0])
22316 emit_move_insn (operands[0],
22317 gen_lowpart (GET_MODE (operands[0]), target));
22321 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
22322 true if we should do zero extension, else sign extension. HIGH_P is
22323 true if we want the N/2 high elements, else the low elements. */
22325 void
22326 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22328 machine_mode imode = GET_MODE (src);
22329 rtx tmp;
22331 if (TARGET_SSE4_1)
22333 rtx (*unpack)(rtx, rtx);
22334 rtx (*extract)(rtx, rtx) = NULL;
22335 machine_mode halfmode = BLKmode;
22337 switch (imode)
22339 case V64QImode:
22340 if (unsigned_p)
22341 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22342 else
22343 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22344 halfmode = V32QImode;
22345 extract
22346 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22347 break;
22348 case V32QImode:
22349 if (unsigned_p)
22350 unpack = gen_avx2_zero_extendv16qiv16hi2;
22351 else
22352 unpack = gen_avx2_sign_extendv16qiv16hi2;
22353 halfmode = V16QImode;
22354 extract
22355 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22356 break;
22357 case V32HImode:
22358 if (unsigned_p)
22359 unpack = gen_avx512f_zero_extendv16hiv16si2;
22360 else
22361 unpack = gen_avx512f_sign_extendv16hiv16si2;
22362 halfmode = V16HImode;
22363 extract
22364 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22365 break;
22366 case V16HImode:
22367 if (unsigned_p)
22368 unpack = gen_avx2_zero_extendv8hiv8si2;
22369 else
22370 unpack = gen_avx2_sign_extendv8hiv8si2;
22371 halfmode = V8HImode;
22372 extract
22373 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22374 break;
22375 case V16SImode:
22376 if (unsigned_p)
22377 unpack = gen_avx512f_zero_extendv8siv8di2;
22378 else
22379 unpack = gen_avx512f_sign_extendv8siv8di2;
22380 halfmode = V8SImode;
22381 extract
22382 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22383 break;
22384 case V8SImode:
22385 if (unsigned_p)
22386 unpack = gen_avx2_zero_extendv4siv4di2;
22387 else
22388 unpack = gen_avx2_sign_extendv4siv4di2;
22389 halfmode = V4SImode;
22390 extract
22391 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22392 break;
22393 case V16QImode:
22394 if (unsigned_p)
22395 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22396 else
22397 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22398 break;
22399 case V8HImode:
22400 if (unsigned_p)
22401 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22402 else
22403 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22404 break;
22405 case V4SImode:
22406 if (unsigned_p)
22407 unpack = gen_sse4_1_zero_extendv2siv2di2;
22408 else
22409 unpack = gen_sse4_1_sign_extendv2siv2di2;
22410 break;
22411 default:
22412 gcc_unreachable ();
22415 if (GET_MODE_SIZE (imode) >= 32)
22417 tmp = gen_reg_rtx (halfmode);
22418 emit_insn (extract (tmp, src));
22420 else if (high_p)
22422 /* Shift higher 8 bytes to lower 8 bytes. */
22423 tmp = gen_reg_rtx (V1TImode);
22424 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22425 GEN_INT (64)));
22426 tmp = gen_lowpart (imode, tmp);
22428 else
22429 tmp = src;
22431 emit_insn (unpack (dest, tmp));
22433 else
22435 rtx (*unpack)(rtx, rtx, rtx);
22437 switch (imode)
22439 case V16QImode:
22440 if (high_p)
22441 unpack = gen_vec_interleave_highv16qi;
22442 else
22443 unpack = gen_vec_interleave_lowv16qi;
22444 break;
22445 case V8HImode:
22446 if (high_p)
22447 unpack = gen_vec_interleave_highv8hi;
22448 else
22449 unpack = gen_vec_interleave_lowv8hi;
22450 break;
22451 case V4SImode:
22452 if (high_p)
22453 unpack = gen_vec_interleave_highv4si;
22454 else
22455 unpack = gen_vec_interleave_lowv4si;
22456 break;
22457 default:
22458 gcc_unreachable ();
22461 if (unsigned_p)
22462 tmp = force_reg (imode, CONST0_RTX (imode));
22463 else
22464 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22465 src, pc_rtx, pc_rtx);
22467 rtx tmp2 = gen_reg_rtx (imode);
22468 emit_insn (unpack (tmp2, src, tmp));
22469 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22473 /* Expand conditional increment or decrement using adb/sbb instructions.
22474 The default case using setcc followed by the conditional move can be
22475 done by generic code. */
22476 bool
22477 ix86_expand_int_addcc (rtx operands[])
22479 enum rtx_code code = GET_CODE (operands[1]);
22480 rtx flags;
22481 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22482 rtx compare_op;
22483 rtx val = const0_rtx;
22484 bool fpcmp = false;
22485 machine_mode mode;
22486 rtx op0 = XEXP (operands[1], 0);
22487 rtx op1 = XEXP (operands[1], 1);
22489 if (operands[3] != const1_rtx
22490 && operands[3] != constm1_rtx)
22491 return false;
22492 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22493 return false;
22494 code = GET_CODE (compare_op);
22496 flags = XEXP (compare_op, 0);
22498 if (GET_MODE (flags) == CCFPmode
22499 || GET_MODE (flags) == CCFPUmode)
22501 fpcmp = true;
22502 code = ix86_fp_compare_code_to_integer (code);
22505 if (code != LTU)
22507 val = constm1_rtx;
22508 if (fpcmp)
22509 PUT_CODE (compare_op,
22510 reverse_condition_maybe_unordered
22511 (GET_CODE (compare_op)));
22512 else
22513 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22516 mode = GET_MODE (operands[0]);
22518 /* Construct either adc or sbb insn. */
22519 if ((code == LTU) == (operands[3] == constm1_rtx))
22521 switch (mode)
22523 case QImode:
22524 insn = gen_subqi3_carry;
22525 break;
22526 case HImode:
22527 insn = gen_subhi3_carry;
22528 break;
22529 case SImode:
22530 insn = gen_subsi3_carry;
22531 break;
22532 case DImode:
22533 insn = gen_subdi3_carry;
22534 break;
22535 default:
22536 gcc_unreachable ();
22539 else
22541 switch (mode)
22543 case QImode:
22544 insn = gen_addqi3_carry;
22545 break;
22546 case HImode:
22547 insn = gen_addhi3_carry;
22548 break;
22549 case SImode:
22550 insn = gen_addsi3_carry;
22551 break;
22552 case DImode:
22553 insn = gen_adddi3_carry;
22554 break;
22555 default:
22556 gcc_unreachable ();
22559 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22561 return true;
22565 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22566 but works for floating pointer parameters and nonoffsetable memories.
22567 For pushes, it returns just stack offsets; the values will be saved
22568 in the right order. Maximally three parts are generated. */
22570 static int
22571 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22573 int size;
22575 if (!TARGET_64BIT)
22576 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22577 else
22578 size = (GET_MODE_SIZE (mode) + 4) / 8;
22580 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22581 gcc_assert (size >= 2 && size <= 4);
22583 /* Optimize constant pool reference to immediates. This is used by fp
22584 moves, that force all constants to memory to allow combining. */
22585 if (MEM_P (operand) && MEM_READONLY_P (operand))
22587 rtx tmp = maybe_get_pool_constant (operand);
22588 if (tmp)
22589 operand = tmp;
22592 if (MEM_P (operand) && !offsettable_memref_p (operand))
22594 /* The only non-offsetable memories we handle are pushes. */
22595 int ok = push_operand (operand, VOIDmode);
22597 gcc_assert (ok);
22599 operand = copy_rtx (operand);
22600 PUT_MODE (operand, word_mode);
22601 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22602 return size;
22605 if (GET_CODE (operand) == CONST_VECTOR)
22607 machine_mode imode = int_mode_for_mode (mode);
22608 /* Caution: if we looked through a constant pool memory above,
22609 the operand may actually have a different mode now. That's
22610 ok, since we want to pun this all the way back to an integer. */
22611 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22612 gcc_assert (operand != NULL);
22613 mode = imode;
22616 if (!TARGET_64BIT)
22618 if (mode == DImode)
22619 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22620 else
22622 int i;
22624 if (REG_P (operand))
22626 gcc_assert (reload_completed);
22627 for (i = 0; i < size; i++)
22628 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22630 else if (offsettable_memref_p (operand))
22632 operand = adjust_address (operand, SImode, 0);
22633 parts[0] = operand;
22634 for (i = 1; i < size; i++)
22635 parts[i] = adjust_address (operand, SImode, 4 * i);
22637 else if (CONST_DOUBLE_P (operand))
22639 REAL_VALUE_TYPE r;
22640 long l[4];
22642 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22643 switch (mode)
22645 case TFmode:
22646 real_to_target (l, &r, mode);
22647 parts[3] = gen_int_mode (l[3], SImode);
22648 parts[2] = gen_int_mode (l[2], SImode);
22649 break;
22650 case XFmode:
22651 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22652 long double may not be 80-bit. */
22653 real_to_target (l, &r, mode);
22654 parts[2] = gen_int_mode (l[2], SImode);
22655 break;
22656 case DFmode:
22657 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22658 break;
22659 default:
22660 gcc_unreachable ();
22662 parts[1] = gen_int_mode (l[1], SImode);
22663 parts[0] = gen_int_mode (l[0], SImode);
22665 else
22666 gcc_unreachable ();
22669 else
22671 if (mode == TImode)
22672 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22673 if (mode == XFmode || mode == TFmode)
22675 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22676 if (REG_P (operand))
22678 gcc_assert (reload_completed);
22679 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22680 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22682 else if (offsettable_memref_p (operand))
22684 operand = adjust_address (operand, DImode, 0);
22685 parts[0] = operand;
22686 parts[1] = adjust_address (operand, upper_mode, 8);
22688 else if (CONST_DOUBLE_P (operand))
22690 REAL_VALUE_TYPE r;
22691 long l[4];
22693 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22694 real_to_target (l, &r, mode);
22696 /* real_to_target puts 32-bit pieces in each long. */
22697 parts[0] =
22698 gen_int_mode
22699 ((l[0] & (HOST_WIDE_INT) 0xffffffff)
22700 | ((l[1] & (HOST_WIDE_INT) 0xffffffff) << 32),
22701 DImode);
22703 if (upper_mode == SImode)
22704 parts[1] = gen_int_mode (l[2], SImode);
22705 else
22706 parts[1] =
22707 gen_int_mode
22708 ((l[2] & (HOST_WIDE_INT) 0xffffffff)
22709 | ((l[3] & (HOST_WIDE_INT) 0xffffffff) << 32),
22710 DImode);
22712 else
22713 gcc_unreachable ();
22717 return size;
22720 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22721 Return false when normal moves are needed; true when all required
22722 insns have been emitted. Operands 2-4 contain the input values
22723 int the correct order; operands 5-7 contain the output values. */
22725 void
22726 ix86_split_long_move (rtx operands[])
22728 rtx part[2][4];
22729 int nparts, i, j;
22730 int push = 0;
22731 int collisions = 0;
22732 machine_mode mode = GET_MODE (operands[0]);
22733 bool collisionparts[4];
22735 /* The DFmode expanders may ask us to move double.
22736 For 64bit target this is single move. By hiding the fact
22737 here we simplify i386.md splitters. */
22738 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22740 /* Optimize constant pool reference to immediates. This is used by
22741 fp moves, that force all constants to memory to allow combining. */
22743 if (MEM_P (operands[1])
22744 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22745 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22746 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22747 if (push_operand (operands[0], VOIDmode))
22749 operands[0] = copy_rtx (operands[0]);
22750 PUT_MODE (operands[0], word_mode);
22752 else
22753 operands[0] = gen_lowpart (DImode, operands[0]);
22754 operands[1] = gen_lowpart (DImode, operands[1]);
22755 emit_move_insn (operands[0], operands[1]);
22756 return;
22759 /* The only non-offsettable memory we handle is push. */
22760 if (push_operand (operands[0], VOIDmode))
22761 push = 1;
22762 else
22763 gcc_assert (!MEM_P (operands[0])
22764 || offsettable_memref_p (operands[0]));
22766 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22767 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22769 /* When emitting push, take care for source operands on the stack. */
22770 if (push && MEM_P (operands[1])
22771 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22773 rtx src_base = XEXP (part[1][nparts - 1], 0);
22775 /* Compensate for the stack decrement by 4. */
22776 if (!TARGET_64BIT && nparts == 3
22777 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22778 src_base = plus_constant (Pmode, src_base, 4);
22780 /* src_base refers to the stack pointer and is
22781 automatically decreased by emitted push. */
22782 for (i = 0; i < nparts; i++)
22783 part[1][i] = change_address (part[1][i],
22784 GET_MODE (part[1][i]), src_base);
22787 /* We need to do copy in the right order in case an address register
22788 of the source overlaps the destination. */
22789 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22791 rtx tmp;
22793 for (i = 0; i < nparts; i++)
22795 collisionparts[i]
22796 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22797 if (collisionparts[i])
22798 collisions++;
22801 /* Collision in the middle part can be handled by reordering. */
22802 if (collisions == 1 && nparts == 3 && collisionparts [1])
22804 std::swap (part[0][1], part[0][2]);
22805 std::swap (part[1][1], part[1][2]);
22807 else if (collisions == 1
22808 && nparts == 4
22809 && (collisionparts [1] || collisionparts [2]))
22811 if (collisionparts [1])
22813 std::swap (part[0][1], part[0][2]);
22814 std::swap (part[1][1], part[1][2]);
22816 else
22818 std::swap (part[0][2], part[0][3]);
22819 std::swap (part[1][2], part[1][3]);
22823 /* If there are more collisions, we can't handle it by reordering.
22824 Do an lea to the last part and use only one colliding move. */
22825 else if (collisions > 1)
22827 rtx base;
22829 collisions = 1;
22831 base = part[0][nparts - 1];
22833 /* Handle the case when the last part isn't valid for lea.
22834 Happens in 64-bit mode storing the 12-byte XFmode. */
22835 if (GET_MODE (base) != Pmode)
22836 base = gen_rtx_REG (Pmode, REGNO (base));
22838 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22839 part[1][0] = replace_equiv_address (part[1][0], base);
22840 for (i = 1; i < nparts; i++)
22842 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22843 part[1][i] = replace_equiv_address (part[1][i], tmp);
22848 if (push)
22850 if (!TARGET_64BIT)
22852 if (nparts == 3)
22854 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22855 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22856 stack_pointer_rtx, GEN_INT (-4)));
22857 emit_move_insn (part[0][2], part[1][2]);
22859 else if (nparts == 4)
22861 emit_move_insn (part[0][3], part[1][3]);
22862 emit_move_insn (part[0][2], part[1][2]);
22865 else
22867 /* In 64bit mode we don't have 32bit push available. In case this is
22868 register, it is OK - we will just use larger counterpart. We also
22869 retype memory - these comes from attempt to avoid REX prefix on
22870 moving of second half of TFmode value. */
22871 if (GET_MODE (part[1][1]) == SImode)
22873 switch (GET_CODE (part[1][1]))
22875 case MEM:
22876 part[1][1] = adjust_address (part[1][1], DImode, 0);
22877 break;
22879 case REG:
22880 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22881 break;
22883 default:
22884 gcc_unreachable ();
22887 if (GET_MODE (part[1][0]) == SImode)
22888 part[1][0] = part[1][1];
22891 emit_move_insn (part[0][1], part[1][1]);
22892 emit_move_insn (part[0][0], part[1][0]);
22893 return;
22896 /* Choose correct order to not overwrite the source before it is copied. */
22897 if ((REG_P (part[0][0])
22898 && REG_P (part[1][1])
22899 && (REGNO (part[0][0]) == REGNO (part[1][1])
22900 || (nparts == 3
22901 && REGNO (part[0][0]) == REGNO (part[1][2]))
22902 || (nparts == 4
22903 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22904 || (collisions > 0
22905 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22907 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22909 operands[2 + i] = part[0][j];
22910 operands[6 + i] = part[1][j];
22913 else
22915 for (i = 0; i < nparts; i++)
22917 operands[2 + i] = part[0][i];
22918 operands[6 + i] = part[1][i];
22922 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22923 if (optimize_insn_for_size_p ())
22925 for (j = 0; j < nparts - 1; j++)
22926 if (CONST_INT_P (operands[6 + j])
22927 && operands[6 + j] != const0_rtx
22928 && REG_P (operands[2 + j]))
22929 for (i = j; i < nparts - 1; i++)
22930 if (CONST_INT_P (operands[7 + i])
22931 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22932 operands[7 + i] = operands[2 + j];
22935 for (i = 0; i < nparts; i++)
22936 emit_move_insn (operands[2 + i], operands[6 + i]);
22938 return;
22941 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22942 left shift by a constant, either using a single shift or
22943 a sequence of add instructions. */
22945 static void
22946 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
22948 rtx (*insn)(rtx, rtx, rtx);
22950 if (count == 1
22951 || (count * ix86_cost->add <= ix86_cost->shift_const
22952 && !optimize_insn_for_size_p ()))
22954 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22955 while (count-- > 0)
22956 emit_insn (insn (operand, operand, operand));
22958 else
22960 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22961 emit_insn (insn (operand, operand, GEN_INT (count)));
22965 void
22966 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
22968 rtx (*gen_ashl3)(rtx, rtx, rtx);
22969 rtx (*gen_shld)(rtx, rtx, rtx);
22970 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22972 rtx low[2], high[2];
22973 int count;
22975 if (CONST_INT_P (operands[2]))
22977 split_double_mode (mode, operands, 2, low, high);
22978 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22980 if (count >= half_width)
22982 emit_move_insn (high[0], low[1]);
22983 emit_move_insn (low[0], const0_rtx);
22985 if (count > half_width)
22986 ix86_expand_ashl_const (high[0], count - half_width, mode);
22988 else
22990 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22992 if (!rtx_equal_p (operands[0], operands[1]))
22993 emit_move_insn (operands[0], operands[1]);
22995 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22996 ix86_expand_ashl_const (low[0], count, mode);
22998 return;
23001 split_double_mode (mode, operands, 1, low, high);
23003 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23005 if (operands[1] == const1_rtx)
23007 /* Assuming we've chosen a QImode capable registers, then 1 << N
23008 can be done with two 32/64-bit shifts, no branches, no cmoves. */
23009 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
23011 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
23013 ix86_expand_clear (low[0]);
23014 ix86_expand_clear (high[0]);
23015 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
23017 d = gen_lowpart (QImode, low[0]);
23018 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23019 s = gen_rtx_EQ (QImode, flags, const0_rtx);
23020 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23022 d = gen_lowpart (QImode, high[0]);
23023 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23024 s = gen_rtx_NE (QImode, flags, const0_rtx);
23025 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23028 /* Otherwise, we can get the same results by manually performing
23029 a bit extract operation on bit 5/6, and then performing the two
23030 shifts. The two methods of getting 0/1 into low/high are exactly
23031 the same size. Avoiding the shift in the bit extract case helps
23032 pentium4 a bit; no one else seems to care much either way. */
23033 else
23035 machine_mode half_mode;
23036 rtx (*gen_lshr3)(rtx, rtx, rtx);
23037 rtx (*gen_and3)(rtx, rtx, rtx);
23038 rtx (*gen_xor3)(rtx, rtx, rtx);
23039 HOST_WIDE_INT bits;
23040 rtx x;
23042 if (mode == DImode)
23044 half_mode = SImode;
23045 gen_lshr3 = gen_lshrsi3;
23046 gen_and3 = gen_andsi3;
23047 gen_xor3 = gen_xorsi3;
23048 bits = 5;
23050 else
23052 half_mode = DImode;
23053 gen_lshr3 = gen_lshrdi3;
23054 gen_and3 = gen_anddi3;
23055 gen_xor3 = gen_xordi3;
23056 bits = 6;
23059 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
23060 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
23061 else
23062 x = gen_lowpart (half_mode, operands[2]);
23063 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
23065 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
23066 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
23067 emit_move_insn (low[0], high[0]);
23068 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
23071 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23072 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
23073 return;
23076 if (operands[1] == constm1_rtx)
23078 /* For -1 << N, we can avoid the shld instruction, because we
23079 know that we're shifting 0...31/63 ones into a -1. */
23080 emit_move_insn (low[0], constm1_rtx);
23081 if (optimize_insn_for_size_p ())
23082 emit_move_insn (high[0], low[0]);
23083 else
23084 emit_move_insn (high[0], constm1_rtx);
23086 else
23088 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23090 if (!rtx_equal_p (operands[0], operands[1]))
23091 emit_move_insn (operands[0], operands[1]);
23093 split_double_mode (mode, operands, 1, low, high);
23094 emit_insn (gen_shld (high[0], low[0], operands[2]));
23097 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23099 if (TARGET_CMOVE && scratch)
23101 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23102 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23104 ix86_expand_clear (scratch);
23105 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23107 else
23109 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23110 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23112 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23116 void
23117 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23119 rtx (*gen_ashr3)(rtx, rtx, rtx)
23120 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23121 rtx (*gen_shrd)(rtx, rtx, rtx);
23122 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23124 rtx low[2], high[2];
23125 int count;
23127 if (CONST_INT_P (operands[2]))
23129 split_double_mode (mode, operands, 2, low, high);
23130 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23132 if (count == GET_MODE_BITSIZE (mode) - 1)
23134 emit_move_insn (high[0], high[1]);
23135 emit_insn (gen_ashr3 (high[0], high[0],
23136 GEN_INT (half_width - 1)));
23137 emit_move_insn (low[0], high[0]);
23140 else if (count >= half_width)
23142 emit_move_insn (low[0], high[1]);
23143 emit_move_insn (high[0], low[0]);
23144 emit_insn (gen_ashr3 (high[0], high[0],
23145 GEN_INT (half_width - 1)));
23147 if (count > half_width)
23148 emit_insn (gen_ashr3 (low[0], low[0],
23149 GEN_INT (count - half_width)));
23151 else
23153 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23155 if (!rtx_equal_p (operands[0], operands[1]))
23156 emit_move_insn (operands[0], operands[1]);
23158 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23159 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23162 else
23164 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23166 if (!rtx_equal_p (operands[0], operands[1]))
23167 emit_move_insn (operands[0], operands[1]);
23169 split_double_mode (mode, operands, 1, low, high);
23171 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23172 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23174 if (TARGET_CMOVE && scratch)
23176 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23177 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23179 emit_move_insn (scratch, high[0]);
23180 emit_insn (gen_ashr3 (scratch, scratch,
23181 GEN_INT (half_width - 1)));
23182 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23183 scratch));
23185 else
23187 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23188 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23190 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23195 void
23196 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23198 rtx (*gen_lshr3)(rtx, rtx, rtx)
23199 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23200 rtx (*gen_shrd)(rtx, rtx, rtx);
23201 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23203 rtx low[2], high[2];
23204 int count;
23206 if (CONST_INT_P (operands[2]))
23208 split_double_mode (mode, operands, 2, low, high);
23209 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23211 if (count >= half_width)
23213 emit_move_insn (low[0], high[1]);
23214 ix86_expand_clear (high[0]);
23216 if (count > half_width)
23217 emit_insn (gen_lshr3 (low[0], low[0],
23218 GEN_INT (count - half_width)));
23220 else
23222 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23224 if (!rtx_equal_p (operands[0], operands[1]))
23225 emit_move_insn (operands[0], operands[1]);
23227 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23228 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23231 else
23233 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23235 if (!rtx_equal_p (operands[0], operands[1]))
23236 emit_move_insn (operands[0], operands[1]);
23238 split_double_mode (mode, operands, 1, low, high);
23240 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23241 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23243 if (TARGET_CMOVE && scratch)
23245 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23246 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23248 ix86_expand_clear (scratch);
23249 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23250 scratch));
23252 else
23254 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23255 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23257 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23262 /* Predict just emitted jump instruction to be taken with probability PROB. */
23263 static void
23264 predict_jump (int prob)
23266 rtx insn = get_last_insn ();
23267 gcc_assert (JUMP_P (insn));
23268 add_int_reg_note (insn, REG_BR_PROB, prob);
23271 /* Helper function for the string operations below. Dest VARIABLE whether
23272 it is aligned to VALUE bytes. If true, jump to the label. */
23273 static rtx_code_label *
23274 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23276 rtx_code_label *label = gen_label_rtx ();
23277 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23278 if (GET_MODE (variable) == DImode)
23279 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23280 else
23281 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23282 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23283 1, label);
23284 if (epilogue)
23285 predict_jump (REG_BR_PROB_BASE * 50 / 100);
23286 else
23287 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23288 return label;
23291 /* Adjust COUNTER by the VALUE. */
23292 static void
23293 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23295 rtx (*gen_add)(rtx, rtx, rtx)
23296 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23298 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23301 /* Zero extend possibly SImode EXP to Pmode register. */
23303 ix86_zero_extend_to_Pmode (rtx exp)
23305 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23308 /* Divide COUNTREG by SCALE. */
23309 static rtx
23310 scale_counter (rtx countreg, int scale)
23312 rtx sc;
23314 if (scale == 1)
23315 return countreg;
23316 if (CONST_INT_P (countreg))
23317 return GEN_INT (INTVAL (countreg) / scale);
23318 gcc_assert (REG_P (countreg));
23320 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23321 GEN_INT (exact_log2 (scale)),
23322 NULL, 1, OPTAB_DIRECT);
23323 return sc;
23326 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
23327 DImode for constant loop counts. */
23329 static machine_mode
23330 counter_mode (rtx count_exp)
23332 if (GET_MODE (count_exp) != VOIDmode)
23333 return GET_MODE (count_exp);
23334 if (!CONST_INT_P (count_exp))
23335 return Pmode;
23336 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23337 return DImode;
23338 return SImode;
23341 /* Copy the address to a Pmode register. This is used for x32 to
23342 truncate DImode TLS address to a SImode register. */
23344 static rtx
23345 ix86_copy_addr_to_reg (rtx addr)
23347 rtx reg;
23348 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23350 reg = copy_addr_to_reg (addr);
23351 REG_POINTER (reg) = 1;
23352 return reg;
23354 else
23356 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23357 reg = copy_to_mode_reg (DImode, addr);
23358 REG_POINTER (reg) = 1;
23359 return gen_rtx_SUBREG (SImode, reg, 0);
23363 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23364 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23365 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23366 memory by VALUE (supposed to be in MODE).
23368 The size is rounded down to whole number of chunk size moved at once.
23369 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23372 static void
23373 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23374 rtx destptr, rtx srcptr, rtx value,
23375 rtx count, machine_mode mode, int unroll,
23376 int expected_size, bool issetmem)
23378 rtx_code_label *out_label, *top_label;
23379 rtx iter, tmp;
23380 machine_mode iter_mode = counter_mode (count);
23381 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23382 rtx piece_size = GEN_INT (piece_size_n);
23383 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23384 rtx size;
23385 int i;
23387 top_label = gen_label_rtx ();
23388 out_label = gen_label_rtx ();
23389 iter = gen_reg_rtx (iter_mode);
23391 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23392 NULL, 1, OPTAB_DIRECT);
23393 /* Those two should combine. */
23394 if (piece_size == const1_rtx)
23396 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23397 true, out_label);
23398 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23400 emit_move_insn (iter, const0_rtx);
23402 emit_label (top_label);
23404 tmp = convert_modes (Pmode, iter_mode, iter, true);
23406 /* This assert could be relaxed - in this case we'll need to compute
23407 smallest power of two, containing in PIECE_SIZE_N and pass it to
23408 offset_address. */
23409 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23410 destmem = offset_address (destmem, tmp, piece_size_n);
23411 destmem = adjust_address (destmem, mode, 0);
23413 if (!issetmem)
23415 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23416 srcmem = adjust_address (srcmem, mode, 0);
23418 /* When unrolling for chips that reorder memory reads and writes,
23419 we can save registers by using single temporary.
23420 Also using 4 temporaries is overkill in 32bit mode. */
23421 if (!TARGET_64BIT && 0)
23423 for (i = 0; i < unroll; i++)
23425 if (i)
23427 destmem =
23428 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23429 srcmem =
23430 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23432 emit_move_insn (destmem, srcmem);
23435 else
23437 rtx tmpreg[4];
23438 gcc_assert (unroll <= 4);
23439 for (i = 0; i < unroll; i++)
23441 tmpreg[i] = gen_reg_rtx (mode);
23442 if (i)
23444 srcmem =
23445 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23447 emit_move_insn (tmpreg[i], srcmem);
23449 for (i = 0; i < unroll; i++)
23451 if (i)
23453 destmem =
23454 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23456 emit_move_insn (destmem, tmpreg[i]);
23460 else
23461 for (i = 0; i < unroll; i++)
23463 if (i)
23464 destmem =
23465 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23466 emit_move_insn (destmem, value);
23469 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23470 true, OPTAB_LIB_WIDEN);
23471 if (tmp != iter)
23472 emit_move_insn (iter, tmp);
23474 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23475 true, top_label);
23476 if (expected_size != -1)
23478 expected_size /= GET_MODE_SIZE (mode) * unroll;
23479 if (expected_size == 0)
23480 predict_jump (0);
23481 else if (expected_size > REG_BR_PROB_BASE)
23482 predict_jump (REG_BR_PROB_BASE - 1);
23483 else
23484 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23486 else
23487 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23488 iter = ix86_zero_extend_to_Pmode (iter);
23489 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23490 true, OPTAB_LIB_WIDEN);
23491 if (tmp != destptr)
23492 emit_move_insn (destptr, tmp);
23493 if (!issetmem)
23495 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23496 true, OPTAB_LIB_WIDEN);
23497 if (tmp != srcptr)
23498 emit_move_insn (srcptr, tmp);
23500 emit_label (out_label);
23503 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23504 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23505 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23506 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23507 ORIG_VALUE is the original value passed to memset to fill the memory with.
23508 Other arguments have same meaning as for previous function. */
23510 static void
23511 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23512 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23513 rtx count,
23514 machine_mode mode, bool issetmem)
23516 rtx destexp;
23517 rtx srcexp;
23518 rtx countreg;
23519 HOST_WIDE_INT rounded_count;
23521 /* If possible, it is shorter to use rep movs.
23522 TODO: Maybe it is better to move this logic to decide_alg. */
23523 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23524 && (!issetmem || orig_value == const0_rtx))
23525 mode = SImode;
23527 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23528 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23530 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23531 GET_MODE_SIZE (mode)));
23532 if (mode != QImode)
23534 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23535 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23536 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23538 else
23539 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23540 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23542 rounded_count = (INTVAL (count)
23543 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23544 destmem = shallow_copy_rtx (destmem);
23545 set_mem_size (destmem, rounded_count);
23547 else if (MEM_SIZE_KNOWN_P (destmem))
23548 clear_mem_size (destmem);
23550 if (issetmem)
23552 value = force_reg (mode, gen_lowpart (mode, value));
23553 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23555 else
23557 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23558 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23559 if (mode != QImode)
23561 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23562 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23563 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23565 else
23566 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23567 if (CONST_INT_P (count))
23569 rounded_count = (INTVAL (count)
23570 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23571 srcmem = shallow_copy_rtx (srcmem);
23572 set_mem_size (srcmem, rounded_count);
23574 else
23576 if (MEM_SIZE_KNOWN_P (srcmem))
23577 clear_mem_size (srcmem);
23579 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23580 destexp, srcexp));
23584 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23585 DESTMEM.
23586 SRC is passed by pointer to be updated on return.
23587 Return value is updated DST. */
23588 static rtx
23589 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23590 HOST_WIDE_INT size_to_move)
23592 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23593 enum insn_code code;
23594 machine_mode move_mode;
23595 int piece_size, i;
23597 /* Find the widest mode in which we could perform moves.
23598 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23599 it until move of such size is supported. */
23600 piece_size = 1 << floor_log2 (size_to_move);
23601 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23602 code = optab_handler (mov_optab, move_mode);
23603 while (code == CODE_FOR_nothing && piece_size > 1)
23605 piece_size >>= 1;
23606 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23607 code = optab_handler (mov_optab, move_mode);
23610 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23611 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23612 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23614 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23615 move_mode = mode_for_vector (word_mode, nunits);
23616 code = optab_handler (mov_optab, move_mode);
23617 if (code == CODE_FOR_nothing)
23619 move_mode = word_mode;
23620 piece_size = GET_MODE_SIZE (move_mode);
23621 code = optab_handler (mov_optab, move_mode);
23624 gcc_assert (code != CODE_FOR_nothing);
23626 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23627 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23629 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23630 gcc_assert (size_to_move % piece_size == 0);
23631 adjust = GEN_INT (piece_size);
23632 for (i = 0; i < size_to_move; i += piece_size)
23634 /* We move from memory to memory, so we'll need to do it via
23635 a temporary register. */
23636 tempreg = gen_reg_rtx (move_mode);
23637 emit_insn (GEN_FCN (code) (tempreg, src));
23638 emit_insn (GEN_FCN (code) (dst, tempreg));
23640 emit_move_insn (destptr,
23641 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23642 emit_move_insn (srcptr,
23643 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23645 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23646 piece_size);
23647 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23648 piece_size);
23651 /* Update DST and SRC rtx. */
23652 *srcmem = src;
23653 return dst;
23656 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23657 static void
23658 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23659 rtx destptr, rtx srcptr, rtx count, int max_size)
23661 rtx src, dest;
23662 if (CONST_INT_P (count))
23664 HOST_WIDE_INT countval = INTVAL (count);
23665 HOST_WIDE_INT epilogue_size = countval % max_size;
23666 int i;
23668 /* For now MAX_SIZE should be a power of 2. This assert could be
23669 relaxed, but it'll require a bit more complicated epilogue
23670 expanding. */
23671 gcc_assert ((max_size & (max_size - 1)) == 0);
23672 for (i = max_size; i >= 1; i >>= 1)
23674 if (epilogue_size & i)
23675 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23677 return;
23679 if (max_size > 8)
23681 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23682 count, 1, OPTAB_DIRECT);
23683 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23684 count, QImode, 1, 4, false);
23685 return;
23688 /* When there are stringops, we can cheaply increase dest and src pointers.
23689 Otherwise we save code size by maintaining offset (zero is readily
23690 available from preceding rep operation) and using x86 addressing modes.
23692 if (TARGET_SINGLE_STRINGOP)
23694 if (max_size > 4)
23696 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23697 src = change_address (srcmem, SImode, srcptr);
23698 dest = change_address (destmem, SImode, destptr);
23699 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23700 emit_label (label);
23701 LABEL_NUSES (label) = 1;
23703 if (max_size > 2)
23705 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23706 src = change_address (srcmem, HImode, srcptr);
23707 dest = change_address (destmem, HImode, destptr);
23708 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23709 emit_label (label);
23710 LABEL_NUSES (label) = 1;
23712 if (max_size > 1)
23714 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23715 src = change_address (srcmem, QImode, srcptr);
23716 dest = change_address (destmem, QImode, destptr);
23717 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23718 emit_label (label);
23719 LABEL_NUSES (label) = 1;
23722 else
23724 rtx offset = force_reg (Pmode, const0_rtx);
23725 rtx tmp;
23727 if (max_size > 4)
23729 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23730 src = change_address (srcmem, SImode, srcptr);
23731 dest = change_address (destmem, SImode, destptr);
23732 emit_move_insn (dest, src);
23733 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23734 true, OPTAB_LIB_WIDEN);
23735 if (tmp != offset)
23736 emit_move_insn (offset, tmp);
23737 emit_label (label);
23738 LABEL_NUSES (label) = 1;
23740 if (max_size > 2)
23742 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23743 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23744 src = change_address (srcmem, HImode, tmp);
23745 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23746 dest = change_address (destmem, HImode, tmp);
23747 emit_move_insn (dest, src);
23748 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23749 true, OPTAB_LIB_WIDEN);
23750 if (tmp != offset)
23751 emit_move_insn (offset, tmp);
23752 emit_label (label);
23753 LABEL_NUSES (label) = 1;
23755 if (max_size > 1)
23757 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23758 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23759 src = change_address (srcmem, QImode, tmp);
23760 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23761 dest = change_address (destmem, QImode, tmp);
23762 emit_move_insn (dest, src);
23763 emit_label (label);
23764 LABEL_NUSES (label) = 1;
23769 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23770 with value PROMOTED_VAL.
23771 SRC is passed by pointer to be updated on return.
23772 Return value is updated DST. */
23773 static rtx
23774 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23775 HOST_WIDE_INT size_to_move)
23777 rtx dst = destmem, adjust;
23778 enum insn_code code;
23779 machine_mode move_mode;
23780 int piece_size, i;
23782 /* Find the widest mode in which we could perform moves.
23783 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23784 it until move of such size is supported. */
23785 move_mode = GET_MODE (promoted_val);
23786 if (move_mode == VOIDmode)
23787 move_mode = QImode;
23788 if (size_to_move < GET_MODE_SIZE (move_mode))
23790 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23791 promoted_val = gen_lowpart (move_mode, promoted_val);
23793 piece_size = GET_MODE_SIZE (move_mode);
23794 code = optab_handler (mov_optab, move_mode);
23795 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23797 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23799 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23800 gcc_assert (size_to_move % piece_size == 0);
23801 adjust = GEN_INT (piece_size);
23802 for (i = 0; i < size_to_move; i += piece_size)
23804 if (piece_size <= GET_MODE_SIZE (word_mode))
23806 emit_insn (gen_strset (destptr, dst, promoted_val));
23807 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23808 piece_size);
23809 continue;
23812 emit_insn (GEN_FCN (code) (dst, promoted_val));
23814 emit_move_insn (destptr,
23815 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23817 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23818 piece_size);
23821 /* Update DST rtx. */
23822 return dst;
23824 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23825 static void
23826 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23827 rtx count, int max_size)
23829 count =
23830 expand_simple_binop (counter_mode (count), AND, count,
23831 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23832 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23833 gen_lowpart (QImode, value), count, QImode,
23834 1, max_size / 2, true);
23837 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23838 static void
23839 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23840 rtx count, int max_size)
23842 rtx dest;
23844 if (CONST_INT_P (count))
23846 HOST_WIDE_INT countval = INTVAL (count);
23847 HOST_WIDE_INT epilogue_size = countval % max_size;
23848 int i;
23850 /* For now MAX_SIZE should be a power of 2. This assert could be
23851 relaxed, but it'll require a bit more complicated epilogue
23852 expanding. */
23853 gcc_assert ((max_size & (max_size - 1)) == 0);
23854 for (i = max_size; i >= 1; i >>= 1)
23856 if (epilogue_size & i)
23858 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23859 destmem = emit_memset (destmem, destptr, vec_value, i);
23860 else
23861 destmem = emit_memset (destmem, destptr, value, i);
23864 return;
23866 if (max_size > 32)
23868 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23869 return;
23871 if (max_size > 16)
23873 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23874 if (TARGET_64BIT)
23876 dest = change_address (destmem, DImode, destptr);
23877 emit_insn (gen_strset (destptr, dest, value));
23878 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23879 emit_insn (gen_strset (destptr, dest, value));
23881 else
23883 dest = change_address (destmem, SImode, destptr);
23884 emit_insn (gen_strset (destptr, dest, value));
23885 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23886 emit_insn (gen_strset (destptr, dest, value));
23887 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23888 emit_insn (gen_strset (destptr, dest, value));
23889 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23890 emit_insn (gen_strset (destptr, dest, value));
23892 emit_label (label);
23893 LABEL_NUSES (label) = 1;
23895 if (max_size > 8)
23897 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
23898 if (TARGET_64BIT)
23900 dest = change_address (destmem, DImode, destptr);
23901 emit_insn (gen_strset (destptr, dest, value));
23903 else
23905 dest = change_address (destmem, SImode, destptr);
23906 emit_insn (gen_strset (destptr, dest, value));
23907 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23908 emit_insn (gen_strset (destptr, dest, value));
23910 emit_label (label);
23911 LABEL_NUSES (label) = 1;
23913 if (max_size > 4)
23915 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23916 dest = change_address (destmem, SImode, destptr);
23917 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23918 emit_label (label);
23919 LABEL_NUSES (label) = 1;
23921 if (max_size > 2)
23923 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23924 dest = change_address (destmem, HImode, destptr);
23925 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23926 emit_label (label);
23927 LABEL_NUSES (label) = 1;
23929 if (max_size > 1)
23931 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23932 dest = change_address (destmem, QImode, destptr);
23933 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23934 emit_label (label);
23935 LABEL_NUSES (label) = 1;
23939 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23940 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23941 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23942 ignored.
23943 Return value is updated DESTMEM. */
23944 static rtx
23945 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23946 rtx destptr, rtx srcptr, rtx value,
23947 rtx vec_value, rtx count, int align,
23948 int desired_alignment, bool issetmem)
23950 int i;
23951 for (i = 1; i < desired_alignment; i <<= 1)
23953 if (align <= i)
23955 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
23956 if (issetmem)
23958 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23959 destmem = emit_memset (destmem, destptr, vec_value, i);
23960 else
23961 destmem = emit_memset (destmem, destptr, value, i);
23963 else
23964 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23965 ix86_adjust_counter (count, i);
23966 emit_label (label);
23967 LABEL_NUSES (label) = 1;
23968 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23971 return destmem;
23974 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23975 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23976 and jump to DONE_LABEL. */
23977 static void
23978 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23979 rtx destptr, rtx srcptr,
23980 rtx value, rtx vec_value,
23981 rtx count, int size,
23982 rtx done_label, bool issetmem)
23984 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
23985 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23986 rtx modesize;
23987 int n;
23989 /* If we do not have vector value to copy, we must reduce size. */
23990 if (issetmem)
23992 if (!vec_value)
23994 if (GET_MODE (value) == VOIDmode && size > 8)
23995 mode = Pmode;
23996 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23997 mode = GET_MODE (value);
23999 else
24000 mode = GET_MODE (vec_value), value = vec_value;
24002 else
24004 /* Choose appropriate vector mode. */
24005 if (size >= 32)
24006 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
24007 else if (size >= 16)
24008 mode = TARGET_SSE ? V16QImode : DImode;
24009 srcmem = change_address (srcmem, mode, srcptr);
24011 destmem = change_address (destmem, mode, destptr);
24012 modesize = GEN_INT (GET_MODE_SIZE (mode));
24013 gcc_assert (GET_MODE_SIZE (mode) <= size);
24014 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24016 if (issetmem)
24017 emit_move_insn (destmem, gen_lowpart (mode, value));
24018 else
24020 emit_move_insn (destmem, srcmem);
24021 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24023 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24026 destmem = offset_address (destmem, count, 1);
24027 destmem = offset_address (destmem, GEN_INT (-2 * size),
24028 GET_MODE_SIZE (mode));
24029 if (!issetmem)
24031 srcmem = offset_address (srcmem, count, 1);
24032 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
24033 GET_MODE_SIZE (mode));
24035 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24037 if (issetmem)
24038 emit_move_insn (destmem, gen_lowpart (mode, value));
24039 else
24041 emit_move_insn (destmem, srcmem);
24042 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24044 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24046 emit_jump_insn (gen_jump (done_label));
24047 emit_barrier ();
24049 emit_label (label);
24050 LABEL_NUSES (label) = 1;
24053 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
24054 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
24055 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
24056 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
24057 DONE_LABEL is a label after the whole copying sequence. The label is created
24058 on demand if *DONE_LABEL is NULL.
24059 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
24060 bounds after the initial copies.
24062 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
24063 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
24064 we will dispatch to a library call for large blocks.
24066 In pseudocode we do:
24068 if (COUNT < SIZE)
24070 Assume that SIZE is 4. Bigger sizes are handled analogously
24071 if (COUNT & 4)
24073 copy 4 bytes from SRCPTR to DESTPTR
24074 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
24075 goto done_label
24077 if (!COUNT)
24078 goto done_label;
24079 copy 1 byte from SRCPTR to DESTPTR
24080 if (COUNT & 2)
24082 copy 2 bytes from SRCPTR to DESTPTR
24083 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
24086 else
24088 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
24089 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
24091 OLD_DESPTR = DESTPTR;
24092 Align DESTPTR up to DESIRED_ALIGN
24093 SRCPTR += DESTPTR - OLD_DESTPTR
24094 COUNT -= DEST_PTR - OLD_DESTPTR
24095 if (DYNAMIC_CHECK)
24096 Round COUNT down to multiple of SIZE
24097 << optional caller supplied zero size guard is here >>
24098 << optional caller suppplied dynamic check is here >>
24099 << caller supplied main copy loop is here >>
24101 done_label:
24103 static void
24104 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24105 rtx *destptr, rtx *srcptr,
24106 machine_mode mode,
24107 rtx value, rtx vec_value,
24108 rtx *count,
24109 rtx_code_label **done_label,
24110 int size,
24111 int desired_align,
24112 int align,
24113 unsigned HOST_WIDE_INT *min_size,
24114 bool dynamic_check,
24115 bool issetmem)
24117 rtx_code_label *loop_label = NULL, *label;
24118 int n;
24119 rtx modesize;
24120 int prolog_size = 0;
24121 rtx mode_value;
24123 /* Chose proper value to copy. */
24124 if (issetmem && VECTOR_MODE_P (mode))
24125 mode_value = vec_value;
24126 else
24127 mode_value = value;
24128 gcc_assert (GET_MODE_SIZE (mode) <= size);
24130 /* See if block is big or small, handle small blocks. */
24131 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24133 int size2 = size;
24134 loop_label = gen_label_rtx ();
24136 if (!*done_label)
24137 *done_label = gen_label_rtx ();
24139 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24140 1, loop_label);
24141 size2 >>= 1;
24143 /* Handle sizes > 3. */
24144 for (;size2 > 2; size2 >>= 1)
24145 expand_small_movmem_or_setmem (destmem, srcmem,
24146 *destptr, *srcptr,
24147 value, vec_value,
24148 *count,
24149 size2, *done_label, issetmem);
24150 /* Nothing to copy? Jump to DONE_LABEL if so */
24151 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24152 1, *done_label);
24154 /* Do a byte copy. */
24155 destmem = change_address (destmem, QImode, *destptr);
24156 if (issetmem)
24157 emit_move_insn (destmem, gen_lowpart (QImode, value));
24158 else
24160 srcmem = change_address (srcmem, QImode, *srcptr);
24161 emit_move_insn (destmem, srcmem);
24164 /* Handle sizes 2 and 3. */
24165 label = ix86_expand_aligntest (*count, 2, false);
24166 destmem = change_address (destmem, HImode, *destptr);
24167 destmem = offset_address (destmem, *count, 1);
24168 destmem = offset_address (destmem, GEN_INT (-2), 2);
24169 if (issetmem)
24170 emit_move_insn (destmem, gen_lowpart (HImode, value));
24171 else
24173 srcmem = change_address (srcmem, HImode, *srcptr);
24174 srcmem = offset_address (srcmem, *count, 1);
24175 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24176 emit_move_insn (destmem, srcmem);
24179 emit_label (label);
24180 LABEL_NUSES (label) = 1;
24181 emit_jump_insn (gen_jump (*done_label));
24182 emit_barrier ();
24184 else
24185 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24186 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24188 /* Start memcpy for COUNT >= SIZE. */
24189 if (loop_label)
24191 emit_label (loop_label);
24192 LABEL_NUSES (loop_label) = 1;
24195 /* Copy first desired_align bytes. */
24196 if (!issetmem)
24197 srcmem = change_address (srcmem, mode, *srcptr);
24198 destmem = change_address (destmem, mode, *destptr);
24199 modesize = GEN_INT (GET_MODE_SIZE (mode));
24200 for (n = 0; prolog_size < desired_align - align; n++)
24202 if (issetmem)
24203 emit_move_insn (destmem, mode_value);
24204 else
24206 emit_move_insn (destmem, srcmem);
24207 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24209 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24210 prolog_size += GET_MODE_SIZE (mode);
24214 /* Copy last SIZE bytes. */
24215 destmem = offset_address (destmem, *count, 1);
24216 destmem = offset_address (destmem,
24217 GEN_INT (-size - prolog_size),
24219 if (issetmem)
24220 emit_move_insn (destmem, mode_value);
24221 else
24223 srcmem = offset_address (srcmem, *count, 1);
24224 srcmem = offset_address (srcmem,
24225 GEN_INT (-size - prolog_size),
24227 emit_move_insn (destmem, srcmem);
24229 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24231 destmem = offset_address (destmem, modesize, 1);
24232 if (issetmem)
24233 emit_move_insn (destmem, mode_value);
24234 else
24236 srcmem = offset_address (srcmem, modesize, 1);
24237 emit_move_insn (destmem, srcmem);
24241 /* Align destination. */
24242 if (desired_align > 1 && desired_align > align)
24244 rtx saveddest = *destptr;
24246 gcc_assert (desired_align <= size);
24247 /* Align destptr up, place it to new register. */
24248 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24249 GEN_INT (prolog_size),
24250 NULL_RTX, 1, OPTAB_DIRECT);
24251 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
24252 REG_POINTER (*destptr) = 1;
24253 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24254 GEN_INT (-desired_align),
24255 *destptr, 1, OPTAB_DIRECT);
24256 /* See how many bytes we skipped. */
24257 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24258 *destptr,
24259 saveddest, 1, OPTAB_DIRECT);
24260 /* Adjust srcptr and count. */
24261 if (!issetmem)
24262 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
24263 saveddest, *srcptr, 1, OPTAB_DIRECT);
24264 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24265 saveddest, *count, 1, OPTAB_DIRECT);
24266 /* We copied at most size + prolog_size. */
24267 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24268 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24269 else
24270 *min_size = 0;
24272 /* Our loops always round down the bock size, but for dispatch to library
24273 we need precise value. */
24274 if (dynamic_check)
24275 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
24276 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24278 else
24280 gcc_assert (prolog_size == 0);
24281 /* Decrease count, so we won't end up copying last word twice. */
24282 if (!CONST_INT_P (*count))
24283 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24284 constm1_rtx, *count, 1, OPTAB_DIRECT);
24285 else
24286 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24287 if (*min_size)
24288 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24293 /* This function is like the previous one, except here we know how many bytes
24294 need to be copied. That allows us to update alignment not only of DST, which
24295 is returned, but also of SRC, which is passed as a pointer for that
24296 reason. */
24297 static rtx
24298 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24299 rtx srcreg, rtx value, rtx vec_value,
24300 int desired_align, int align_bytes,
24301 bool issetmem)
24303 rtx src = NULL;
24304 rtx orig_dst = dst;
24305 rtx orig_src = NULL;
24306 int piece_size = 1;
24307 int copied_bytes = 0;
24309 if (!issetmem)
24311 gcc_assert (srcp != NULL);
24312 src = *srcp;
24313 orig_src = src;
24316 for (piece_size = 1;
24317 piece_size <= desired_align && copied_bytes < align_bytes;
24318 piece_size <<= 1)
24320 if (align_bytes & piece_size)
24322 if (issetmem)
24324 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24325 dst = emit_memset (dst, destreg, vec_value, piece_size);
24326 else
24327 dst = emit_memset (dst, destreg, value, piece_size);
24329 else
24330 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24331 copied_bytes += piece_size;
24334 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24335 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24336 if (MEM_SIZE_KNOWN_P (orig_dst))
24337 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24339 if (!issetmem)
24341 int src_align_bytes = get_mem_align_offset (src, desired_align
24342 * BITS_PER_UNIT);
24343 if (src_align_bytes >= 0)
24344 src_align_bytes = desired_align - src_align_bytes;
24345 if (src_align_bytes >= 0)
24347 unsigned int src_align;
24348 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24350 if ((src_align_bytes & (src_align - 1))
24351 == (align_bytes & (src_align - 1)))
24352 break;
24354 if (src_align > (unsigned int) desired_align)
24355 src_align = desired_align;
24356 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24357 set_mem_align (src, src_align * BITS_PER_UNIT);
24359 if (MEM_SIZE_KNOWN_P (orig_src))
24360 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24361 *srcp = src;
24364 return dst;
24367 /* Return true if ALG can be used in current context.
24368 Assume we expand memset if MEMSET is true. */
24369 static bool
24370 alg_usable_p (enum stringop_alg alg, bool memset)
24372 if (alg == no_stringop)
24373 return false;
24374 if (alg == vector_loop)
24375 return TARGET_SSE || TARGET_AVX;
24376 /* Algorithms using the rep prefix want at least edi and ecx;
24377 additionally, memset wants eax and memcpy wants esi. Don't
24378 consider such algorithms if the user has appropriated those
24379 registers for their own purposes. */
24380 if (alg == rep_prefix_1_byte
24381 || alg == rep_prefix_4_byte
24382 || alg == rep_prefix_8_byte)
24383 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24384 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24385 return true;
24388 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24389 static enum stringop_alg
24390 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24391 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24392 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24394 const struct stringop_algs * algs;
24395 bool optimize_for_speed;
24396 int max = 0;
24397 const struct processor_costs *cost;
24398 int i;
24399 bool any_alg_usable_p = false;
24401 *noalign = false;
24402 *dynamic_check = -1;
24404 /* Even if the string operation call is cold, we still might spend a lot
24405 of time processing large blocks. */
24406 if (optimize_function_for_size_p (cfun)
24407 || (optimize_insn_for_size_p ()
24408 && (max_size < 256
24409 || (expected_size != -1 && expected_size < 256))))
24410 optimize_for_speed = false;
24411 else
24412 optimize_for_speed = true;
24414 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24415 if (memset)
24416 algs = &cost->memset[TARGET_64BIT != 0];
24417 else
24418 algs = &cost->memcpy[TARGET_64BIT != 0];
24420 /* See maximal size for user defined algorithm. */
24421 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24423 enum stringop_alg candidate = algs->size[i].alg;
24424 bool usable = alg_usable_p (candidate, memset);
24425 any_alg_usable_p |= usable;
24427 if (candidate != libcall && candidate && usable)
24428 max = algs->size[i].max;
24431 /* If expected size is not known but max size is small enough
24432 so inline version is a win, set expected size into
24433 the range. */
24434 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24435 && expected_size == -1)
24436 expected_size = min_size / 2 + max_size / 2;
24438 /* If user specified the algorithm, honnor it if possible. */
24439 if (ix86_stringop_alg != no_stringop
24440 && alg_usable_p (ix86_stringop_alg, memset))
24441 return ix86_stringop_alg;
24442 /* rep; movq or rep; movl is the smallest variant. */
24443 else if (!optimize_for_speed)
24445 *noalign = true;
24446 if (!count || (count & 3) || (memset && !zero_memset))
24447 return alg_usable_p (rep_prefix_1_byte, memset)
24448 ? rep_prefix_1_byte : loop_1_byte;
24449 else
24450 return alg_usable_p (rep_prefix_4_byte, memset)
24451 ? rep_prefix_4_byte : loop;
24453 /* Very tiny blocks are best handled via the loop, REP is expensive to
24454 setup. */
24455 else if (expected_size != -1 && expected_size < 4)
24456 return loop_1_byte;
24457 else if (expected_size != -1)
24459 enum stringop_alg alg = libcall;
24460 bool alg_noalign = false;
24461 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24463 /* We get here if the algorithms that were not libcall-based
24464 were rep-prefix based and we are unable to use rep prefixes
24465 based on global register usage. Break out of the loop and
24466 use the heuristic below. */
24467 if (algs->size[i].max == 0)
24468 break;
24469 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24471 enum stringop_alg candidate = algs->size[i].alg;
24473 if (candidate != libcall && alg_usable_p (candidate, memset))
24475 alg = candidate;
24476 alg_noalign = algs->size[i].noalign;
24478 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24479 last non-libcall inline algorithm. */
24480 if (TARGET_INLINE_ALL_STRINGOPS)
24482 /* When the current size is best to be copied by a libcall,
24483 but we are still forced to inline, run the heuristic below
24484 that will pick code for medium sized blocks. */
24485 if (alg != libcall)
24487 *noalign = alg_noalign;
24488 return alg;
24490 else if (!any_alg_usable_p)
24491 break;
24493 else if (alg_usable_p (candidate, memset))
24495 *noalign = algs->size[i].noalign;
24496 return candidate;
24501 /* When asked to inline the call anyway, try to pick meaningful choice.
24502 We look for maximal size of block that is faster to copy by hand and
24503 take blocks of at most of that size guessing that average size will
24504 be roughly half of the block.
24506 If this turns out to be bad, we might simply specify the preferred
24507 choice in ix86_costs. */
24508 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24509 && (algs->unknown_size == libcall
24510 || !alg_usable_p (algs->unknown_size, memset)))
24512 enum stringop_alg alg;
24514 /* If there aren't any usable algorithms, then recursing on
24515 smaller sizes isn't going to find anything. Just return the
24516 simple byte-at-a-time copy loop. */
24517 if (!any_alg_usable_p)
24519 /* Pick something reasonable. */
24520 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24521 *dynamic_check = 128;
24522 return loop_1_byte;
24524 if (max <= 0)
24525 max = 4096;
24526 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24527 zero_memset, dynamic_check, noalign);
24528 gcc_assert (*dynamic_check == -1);
24529 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24530 *dynamic_check = max;
24531 else
24532 gcc_assert (alg != libcall);
24533 return alg;
24535 return (alg_usable_p (algs->unknown_size, memset)
24536 ? algs->unknown_size : libcall);
24539 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24540 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24541 static int
24542 decide_alignment (int align,
24543 enum stringop_alg alg,
24544 int expected_size,
24545 machine_mode move_mode)
24547 int desired_align = 0;
24549 gcc_assert (alg != no_stringop);
24551 if (alg == libcall)
24552 return 0;
24553 if (move_mode == VOIDmode)
24554 return 0;
24556 desired_align = GET_MODE_SIZE (move_mode);
24557 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24558 copying whole cacheline at once. */
24559 if (TARGET_PENTIUMPRO
24560 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24561 desired_align = 8;
24563 if (optimize_size)
24564 desired_align = 1;
24565 if (desired_align < align)
24566 desired_align = align;
24567 if (expected_size != -1 && expected_size < 4)
24568 desired_align = align;
24570 return desired_align;
24574 /* Helper function for memcpy. For QImode value 0xXY produce
24575 0xXYXYXYXY of wide specified by MODE. This is essentially
24576 a * 0x10101010, but we can do slightly better than
24577 synth_mult by unwinding the sequence by hand on CPUs with
24578 slow multiply. */
24579 static rtx
24580 promote_duplicated_reg (machine_mode mode, rtx val)
24582 machine_mode valmode = GET_MODE (val);
24583 rtx tmp;
24584 int nops = mode == DImode ? 3 : 2;
24586 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24587 if (val == const0_rtx)
24588 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24589 if (CONST_INT_P (val))
24591 HOST_WIDE_INT v = INTVAL (val) & 255;
24593 v |= v << 8;
24594 v |= v << 16;
24595 if (mode == DImode)
24596 v |= (v << 16) << 16;
24597 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24600 if (valmode == VOIDmode)
24601 valmode = QImode;
24602 if (valmode != QImode)
24603 val = gen_lowpart (QImode, val);
24604 if (mode == QImode)
24605 return val;
24606 if (!TARGET_PARTIAL_REG_STALL)
24607 nops--;
24608 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24609 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24610 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24611 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24613 rtx reg = convert_modes (mode, QImode, val, true);
24614 tmp = promote_duplicated_reg (mode, const1_rtx);
24615 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24616 OPTAB_DIRECT);
24618 else
24620 rtx reg = convert_modes (mode, QImode, val, true);
24622 if (!TARGET_PARTIAL_REG_STALL)
24623 if (mode == SImode)
24624 emit_insn (gen_movsi_insv_1 (reg, reg));
24625 else
24626 emit_insn (gen_movdi_insv_1 (reg, reg));
24627 else
24629 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24630 NULL, 1, OPTAB_DIRECT);
24631 reg =
24632 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24634 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24635 NULL, 1, OPTAB_DIRECT);
24636 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24637 if (mode == SImode)
24638 return reg;
24639 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24640 NULL, 1, OPTAB_DIRECT);
24641 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24642 return reg;
24646 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24647 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24648 alignment from ALIGN to DESIRED_ALIGN. */
24649 static rtx
24650 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24651 int align)
24653 rtx promoted_val;
24655 if (TARGET_64BIT
24656 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24657 promoted_val = promote_duplicated_reg (DImode, val);
24658 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24659 promoted_val = promote_duplicated_reg (SImode, val);
24660 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24661 promoted_val = promote_duplicated_reg (HImode, val);
24662 else
24663 promoted_val = val;
24665 return promoted_val;
24668 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24669 operations when profitable. The code depends upon architecture, block size
24670 and alignment, but always has one of the following overall structures:
24672 Aligned move sequence:
24674 1) Prologue guard: Conditional that jumps up to epilogues for small
24675 blocks that can be handled by epilogue alone. This is faster
24676 but also needed for correctness, since prologue assume the block
24677 is larger than the desired alignment.
24679 Optional dynamic check for size and libcall for large
24680 blocks is emitted here too, with -minline-stringops-dynamically.
24682 2) Prologue: copy first few bytes in order to get destination
24683 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24684 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24685 copied. We emit either a jump tree on power of two sized
24686 blocks, or a byte loop.
24688 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24689 with specified algorithm.
24691 4) Epilogue: code copying tail of the block that is too small to be
24692 handled by main body (or up to size guarded by prologue guard).
24694 Misaligned move sequence
24696 1) missaligned move prologue/epilogue containing:
24697 a) Prologue handling small memory blocks and jumping to done_label
24698 (skipped if blocks are known to be large enough)
24699 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24700 needed by single possibly misaligned move
24701 (skipped if alignment is not needed)
24702 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24704 2) Zero size guard dispatching to done_label, if needed
24706 3) dispatch to library call, if needed,
24708 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24709 with specified algorithm. */
24710 bool
24711 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24712 rtx align_exp, rtx expected_align_exp,
24713 rtx expected_size_exp, rtx min_size_exp,
24714 rtx max_size_exp, rtx probable_max_size_exp,
24715 bool issetmem)
24717 rtx destreg;
24718 rtx srcreg = NULL;
24719 rtx_code_label *label = NULL;
24720 rtx tmp;
24721 rtx_code_label *jump_around_label = NULL;
24722 HOST_WIDE_INT align = 1;
24723 unsigned HOST_WIDE_INT count = 0;
24724 HOST_WIDE_INT expected_size = -1;
24725 int size_needed = 0, epilogue_size_needed;
24726 int desired_align = 0, align_bytes = 0;
24727 enum stringop_alg alg;
24728 rtx promoted_val = NULL;
24729 rtx vec_promoted_val = NULL;
24730 bool force_loopy_epilogue = false;
24731 int dynamic_check;
24732 bool need_zero_guard = false;
24733 bool noalign;
24734 machine_mode move_mode = VOIDmode;
24735 int unroll_factor = 1;
24736 /* TODO: Once value ranges are available, fill in proper data. */
24737 unsigned HOST_WIDE_INT min_size = 0;
24738 unsigned HOST_WIDE_INT max_size = -1;
24739 unsigned HOST_WIDE_INT probable_max_size = -1;
24740 bool misaligned_prologue_used = false;
24742 if (CONST_INT_P (align_exp))
24743 align = INTVAL (align_exp);
24744 /* i386 can do misaligned access on reasonably increased cost. */
24745 if (CONST_INT_P (expected_align_exp)
24746 && INTVAL (expected_align_exp) > align)
24747 align = INTVAL (expected_align_exp);
24748 /* ALIGN is the minimum of destination and source alignment, but we care here
24749 just about destination alignment. */
24750 else if (!issetmem
24751 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24752 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24754 if (CONST_INT_P (count_exp))
24756 min_size = max_size = probable_max_size = count = expected_size
24757 = INTVAL (count_exp);
24758 /* When COUNT is 0, there is nothing to do. */
24759 if (!count)
24760 return true;
24762 else
24764 if (min_size_exp)
24765 min_size = INTVAL (min_size_exp);
24766 if (max_size_exp)
24767 max_size = INTVAL (max_size_exp);
24768 if (probable_max_size_exp)
24769 probable_max_size = INTVAL (probable_max_size_exp);
24770 if (CONST_INT_P (expected_size_exp))
24771 expected_size = INTVAL (expected_size_exp);
24774 /* Make sure we don't need to care about overflow later on. */
24775 if (count > (HOST_WIDE_INT_1U << 30))
24776 return false;
24778 /* Step 0: Decide on preferred algorithm, desired alignment and
24779 size of chunks to be copied by main loop. */
24780 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24781 issetmem,
24782 issetmem && val_exp == const0_rtx,
24783 &dynamic_check, &noalign);
24784 if (alg == libcall)
24785 return false;
24786 gcc_assert (alg != no_stringop);
24788 /* For now vector-version of memset is generated only for memory zeroing, as
24789 creating of promoted vector value is very cheap in this case. */
24790 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24791 alg = unrolled_loop;
24793 if (!count)
24794 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24795 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24796 if (!issetmem)
24797 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24799 unroll_factor = 1;
24800 move_mode = word_mode;
24801 switch (alg)
24803 case libcall:
24804 case no_stringop:
24805 case last_alg:
24806 gcc_unreachable ();
24807 case loop_1_byte:
24808 need_zero_guard = true;
24809 move_mode = QImode;
24810 break;
24811 case loop:
24812 need_zero_guard = true;
24813 break;
24814 case unrolled_loop:
24815 need_zero_guard = true;
24816 unroll_factor = (TARGET_64BIT ? 4 : 2);
24817 break;
24818 case vector_loop:
24819 need_zero_guard = true;
24820 unroll_factor = 4;
24821 /* Find the widest supported mode. */
24822 move_mode = word_mode;
24823 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24824 != CODE_FOR_nothing)
24825 move_mode = GET_MODE_WIDER_MODE (move_mode);
24827 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24828 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24829 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24831 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24832 move_mode = mode_for_vector (word_mode, nunits);
24833 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24834 move_mode = word_mode;
24836 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24837 break;
24838 case rep_prefix_8_byte:
24839 move_mode = DImode;
24840 break;
24841 case rep_prefix_4_byte:
24842 move_mode = SImode;
24843 break;
24844 case rep_prefix_1_byte:
24845 move_mode = QImode;
24846 break;
24848 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24849 epilogue_size_needed = size_needed;
24851 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24852 if (!TARGET_ALIGN_STRINGOPS || noalign)
24853 align = desired_align;
24855 /* Step 1: Prologue guard. */
24857 /* Alignment code needs count to be in register. */
24858 if (CONST_INT_P (count_exp) && desired_align > align)
24860 if (INTVAL (count_exp) > desired_align
24861 && INTVAL (count_exp) > size_needed)
24863 align_bytes
24864 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24865 if (align_bytes <= 0)
24866 align_bytes = 0;
24867 else
24868 align_bytes = desired_align - align_bytes;
24870 if (align_bytes == 0)
24871 count_exp = force_reg (counter_mode (count_exp), count_exp);
24873 gcc_assert (desired_align >= 1 && align >= 1);
24875 /* Misaligned move sequences handle both prologue and epilogue at once.
24876 Default code generation results in a smaller code for large alignments
24877 and also avoids redundant job when sizes are known precisely. */
24878 misaligned_prologue_used
24879 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24880 && MAX (desired_align, epilogue_size_needed) <= 32
24881 && desired_align <= epilogue_size_needed
24882 && ((desired_align > align && !align_bytes)
24883 || (!count && epilogue_size_needed > 1)));
24885 /* Do the cheap promotion to allow better CSE across the
24886 main loop and epilogue (ie one load of the big constant in the
24887 front of all code.
24888 For now the misaligned move sequences do not have fast path
24889 without broadcasting. */
24890 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24892 if (alg == vector_loop)
24894 gcc_assert (val_exp == const0_rtx);
24895 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24896 promoted_val = promote_duplicated_reg_to_size (val_exp,
24897 GET_MODE_SIZE (word_mode),
24898 desired_align, align);
24900 else
24902 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24903 desired_align, align);
24906 /* Misaligned move sequences handles both prologues and epilogues at once.
24907 Default code generation results in smaller code for large alignments and
24908 also avoids redundant job when sizes are known precisely. */
24909 if (misaligned_prologue_used)
24911 /* Misaligned move prologue handled small blocks by itself. */
24912 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24913 (dst, src, &destreg, &srcreg,
24914 move_mode, promoted_val, vec_promoted_val,
24915 &count_exp,
24916 &jump_around_label,
24917 desired_align < align
24918 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24919 desired_align, align, &min_size, dynamic_check, issetmem);
24920 if (!issetmem)
24921 src = change_address (src, BLKmode, srcreg);
24922 dst = change_address (dst, BLKmode, destreg);
24923 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24924 epilogue_size_needed = 0;
24925 if (need_zero_guard && !min_size)
24927 /* It is possible that we copied enough so the main loop will not
24928 execute. */
24929 gcc_assert (size_needed > 1);
24930 if (jump_around_label == NULL_RTX)
24931 jump_around_label = gen_label_rtx ();
24932 emit_cmp_and_jump_insns (count_exp,
24933 GEN_INT (size_needed),
24934 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24935 if (expected_size == -1
24936 || expected_size < (desired_align - align) / 2 + size_needed)
24937 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24938 else
24939 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24942 /* Ensure that alignment prologue won't copy past end of block. */
24943 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24945 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24946 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24947 Make sure it is power of 2. */
24948 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24950 /* To improve performance of small blocks, we jump around the VAL
24951 promoting mode. This mean that if the promoted VAL is not constant,
24952 we might not use it in the epilogue and have to use byte
24953 loop variant. */
24954 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24955 force_loopy_epilogue = true;
24956 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24957 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24959 /* If main algorithm works on QImode, no epilogue is needed.
24960 For small sizes just don't align anything. */
24961 if (size_needed == 1)
24962 desired_align = align;
24963 else
24964 goto epilogue;
24966 else if (!count
24967 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24969 label = gen_label_rtx ();
24970 emit_cmp_and_jump_insns (count_exp,
24971 GEN_INT (epilogue_size_needed),
24972 LTU, 0, counter_mode (count_exp), 1, label);
24973 if (expected_size == -1 || expected_size < epilogue_size_needed)
24974 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24975 else
24976 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24980 /* Emit code to decide on runtime whether library call or inline should be
24981 used. */
24982 if (dynamic_check != -1)
24984 if (!issetmem && CONST_INT_P (count_exp))
24986 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24988 emit_block_move_via_libcall (dst, src, count_exp, false);
24989 count_exp = const0_rtx;
24990 goto epilogue;
24993 else
24995 rtx_code_label *hot_label = gen_label_rtx ();
24996 if (jump_around_label == NULL_RTX)
24997 jump_around_label = gen_label_rtx ();
24998 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24999 LEU, 0, counter_mode (count_exp),
25000 1, hot_label);
25001 predict_jump (REG_BR_PROB_BASE * 90 / 100);
25002 if (issetmem)
25003 set_storage_via_libcall (dst, count_exp, val_exp, false);
25004 else
25005 emit_block_move_via_libcall (dst, src, count_exp, false);
25006 emit_jump (jump_around_label);
25007 emit_label (hot_label);
25011 /* Step 2: Alignment prologue. */
25012 /* Do the expensive promotion once we branched off the small blocks. */
25013 if (issetmem && !promoted_val)
25014 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25015 desired_align, align);
25017 if (desired_align > align && !misaligned_prologue_used)
25019 if (align_bytes == 0)
25021 /* Except for the first move in prologue, we no longer know
25022 constant offset in aliasing info. It don't seems to worth
25023 the pain to maintain it for the first move, so throw away
25024 the info early. */
25025 dst = change_address (dst, BLKmode, destreg);
25026 if (!issetmem)
25027 src = change_address (src, BLKmode, srcreg);
25028 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
25029 promoted_val, vec_promoted_val,
25030 count_exp, align, desired_align,
25031 issetmem);
25032 /* At most desired_align - align bytes are copied. */
25033 if (min_size < (unsigned)(desired_align - align))
25034 min_size = 0;
25035 else
25036 min_size -= desired_align - align;
25038 else
25040 /* If we know how many bytes need to be stored before dst is
25041 sufficiently aligned, maintain aliasing info accurately. */
25042 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
25043 srcreg,
25044 promoted_val,
25045 vec_promoted_val,
25046 desired_align,
25047 align_bytes,
25048 issetmem);
25050 count_exp = plus_constant (counter_mode (count_exp),
25051 count_exp, -align_bytes);
25052 count -= align_bytes;
25053 min_size -= align_bytes;
25054 max_size -= align_bytes;
25056 if (need_zero_guard
25057 && !min_size
25058 && (count < (unsigned HOST_WIDE_INT) size_needed
25059 || (align_bytes == 0
25060 && count < ((unsigned HOST_WIDE_INT) size_needed
25061 + desired_align - align))))
25063 /* It is possible that we copied enough so the main loop will not
25064 execute. */
25065 gcc_assert (size_needed > 1);
25066 if (label == NULL_RTX)
25067 label = gen_label_rtx ();
25068 emit_cmp_and_jump_insns (count_exp,
25069 GEN_INT (size_needed),
25070 LTU, 0, counter_mode (count_exp), 1, label);
25071 if (expected_size == -1
25072 || expected_size < (desired_align - align) / 2 + size_needed)
25073 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25074 else
25075 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25078 if (label && size_needed == 1)
25080 emit_label (label);
25081 LABEL_NUSES (label) = 1;
25082 label = NULL;
25083 epilogue_size_needed = 1;
25084 if (issetmem)
25085 promoted_val = val_exp;
25087 else if (label == NULL_RTX && !misaligned_prologue_used)
25088 epilogue_size_needed = size_needed;
25090 /* Step 3: Main loop. */
25092 switch (alg)
25094 case libcall:
25095 case no_stringop:
25096 case last_alg:
25097 gcc_unreachable ();
25098 case loop_1_byte:
25099 case loop:
25100 case unrolled_loop:
25101 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25102 count_exp, move_mode, unroll_factor,
25103 expected_size, issetmem);
25104 break;
25105 case vector_loop:
25106 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25107 vec_promoted_val, count_exp, move_mode,
25108 unroll_factor, expected_size, issetmem);
25109 break;
25110 case rep_prefix_8_byte:
25111 case rep_prefix_4_byte:
25112 case rep_prefix_1_byte:
25113 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25114 val_exp, count_exp, move_mode, issetmem);
25115 break;
25117 /* Adjust properly the offset of src and dest memory for aliasing. */
25118 if (CONST_INT_P (count_exp))
25120 if (!issetmem)
25121 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25122 (count / size_needed) * size_needed);
25123 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25124 (count / size_needed) * size_needed);
25126 else
25128 if (!issetmem)
25129 src = change_address (src, BLKmode, srcreg);
25130 dst = change_address (dst, BLKmode, destreg);
25133 /* Step 4: Epilogue to copy the remaining bytes. */
25134 epilogue:
25135 if (label)
25137 /* When the main loop is done, COUNT_EXP might hold original count,
25138 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25139 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25140 bytes. Compensate if needed. */
25142 if (size_needed < epilogue_size_needed)
25144 tmp =
25145 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25146 GEN_INT (size_needed - 1), count_exp, 1,
25147 OPTAB_DIRECT);
25148 if (tmp != count_exp)
25149 emit_move_insn (count_exp, tmp);
25151 emit_label (label);
25152 LABEL_NUSES (label) = 1;
25155 if (count_exp != const0_rtx && epilogue_size_needed > 1)
25157 if (force_loopy_epilogue)
25158 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25159 epilogue_size_needed);
25160 else
25162 if (issetmem)
25163 expand_setmem_epilogue (dst, destreg, promoted_val,
25164 vec_promoted_val, count_exp,
25165 epilogue_size_needed);
25166 else
25167 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25168 epilogue_size_needed);
25171 if (jump_around_label)
25172 emit_label (jump_around_label);
25173 return true;
25177 /* Expand the appropriate insns for doing strlen if not just doing
25178 repnz; scasb
25180 out = result, initialized with the start address
25181 align_rtx = alignment of the address.
25182 scratch = scratch register, initialized with the startaddress when
25183 not aligned, otherwise undefined
25185 This is just the body. It needs the initializations mentioned above and
25186 some address computing at the end. These things are done in i386.md. */
25188 static void
25189 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25191 int align;
25192 rtx tmp;
25193 rtx_code_label *align_2_label = NULL;
25194 rtx_code_label *align_3_label = NULL;
25195 rtx_code_label *align_4_label = gen_label_rtx ();
25196 rtx_code_label *end_0_label = gen_label_rtx ();
25197 rtx mem;
25198 rtx tmpreg = gen_reg_rtx (SImode);
25199 rtx scratch = gen_reg_rtx (SImode);
25200 rtx cmp;
25202 align = 0;
25203 if (CONST_INT_P (align_rtx))
25204 align = INTVAL (align_rtx);
25206 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
25208 /* Is there a known alignment and is it less than 4? */
25209 if (align < 4)
25211 rtx scratch1 = gen_reg_rtx (Pmode);
25212 emit_move_insn (scratch1, out);
25213 /* Is there a known alignment and is it not 2? */
25214 if (align != 2)
25216 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25217 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25219 /* Leave just the 3 lower bits. */
25220 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25221 NULL_RTX, 0, OPTAB_WIDEN);
25223 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25224 Pmode, 1, align_4_label);
25225 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25226 Pmode, 1, align_2_label);
25227 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25228 Pmode, 1, align_3_label);
25230 else
25232 /* Since the alignment is 2, we have to check 2 or 0 bytes;
25233 check if is aligned to 4 - byte. */
25235 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25236 NULL_RTX, 0, OPTAB_WIDEN);
25238 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25239 Pmode, 1, align_4_label);
25242 mem = change_address (src, QImode, out);
25244 /* Now compare the bytes. */
25246 /* Compare the first n unaligned byte on a byte per byte basis. */
25247 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25248 QImode, 1, end_0_label);
25250 /* Increment the address. */
25251 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25253 /* Not needed with an alignment of 2 */
25254 if (align != 2)
25256 emit_label (align_2_label);
25258 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25259 end_0_label);
25261 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25263 emit_label (align_3_label);
25266 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25267 end_0_label);
25269 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25272 /* Generate loop to check 4 bytes at a time. It is not a good idea to
25273 align this loop. It gives only huge programs, but does not help to
25274 speed up. */
25275 emit_label (align_4_label);
25277 mem = change_address (src, SImode, out);
25278 emit_move_insn (scratch, mem);
25279 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25281 /* This formula yields a nonzero result iff one of the bytes is zero.
25282 This saves three branches inside loop and many cycles. */
25284 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25285 emit_insn (gen_one_cmplsi2 (scratch, scratch));
25286 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25287 emit_insn (gen_andsi3 (tmpreg, tmpreg,
25288 gen_int_mode (0x80808080, SImode)));
25289 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25290 align_4_label);
25292 if (TARGET_CMOVE)
25294 rtx reg = gen_reg_rtx (SImode);
25295 rtx reg2 = gen_reg_rtx (Pmode);
25296 emit_move_insn (reg, tmpreg);
25297 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25299 /* If zero is not in the first two bytes, move two bytes forward. */
25300 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25301 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25302 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25303 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
25304 gen_rtx_IF_THEN_ELSE (SImode, tmp,
25305 reg,
25306 tmpreg)));
25307 /* Emit lea manually to avoid clobbering of flags. */
25308 emit_insn (gen_rtx_SET (SImode, reg2,
25309 gen_rtx_PLUS (Pmode, out, const2_rtx)));
25311 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25312 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25313 emit_insn (gen_rtx_SET (VOIDmode, out,
25314 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25315 reg2,
25316 out)));
25318 else
25320 rtx_code_label *end_2_label = gen_label_rtx ();
25321 /* Is zero in the first two bytes? */
25323 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25324 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25325 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25326 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25327 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25328 pc_rtx);
25329 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25330 JUMP_LABEL (tmp) = end_2_label;
25332 /* Not in the first two. Move two bytes forward. */
25333 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25334 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25336 emit_label (end_2_label);
25340 /* Avoid branch in fixing the byte. */
25341 tmpreg = gen_lowpart (QImode, tmpreg);
25342 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
25343 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25344 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25345 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25347 emit_label (end_0_label);
25350 /* Expand strlen. */
25352 bool
25353 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25355 rtx addr, scratch1, scratch2, scratch3, scratch4;
25357 /* The generic case of strlen expander is long. Avoid it's
25358 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25360 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25361 && !TARGET_INLINE_ALL_STRINGOPS
25362 && !optimize_insn_for_size_p ()
25363 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25364 return false;
25366 addr = force_reg (Pmode, XEXP (src, 0));
25367 scratch1 = gen_reg_rtx (Pmode);
25369 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25370 && !optimize_insn_for_size_p ())
25372 /* Well it seems that some optimizer does not combine a call like
25373 foo(strlen(bar), strlen(bar));
25374 when the move and the subtraction is done here. It does calculate
25375 the length just once when these instructions are done inside of
25376 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25377 often used and I use one fewer register for the lifetime of
25378 output_strlen_unroll() this is better. */
25380 emit_move_insn (out, addr);
25382 ix86_expand_strlensi_unroll_1 (out, src, align);
25384 /* strlensi_unroll_1 returns the address of the zero at the end of
25385 the string, like memchr(), so compute the length by subtracting
25386 the start address. */
25387 emit_insn (ix86_gen_sub3 (out, out, addr));
25389 else
25391 rtx unspec;
25393 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25394 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25395 return false;
25397 scratch2 = gen_reg_rtx (Pmode);
25398 scratch3 = gen_reg_rtx (Pmode);
25399 scratch4 = force_reg (Pmode, constm1_rtx);
25401 emit_move_insn (scratch3, addr);
25402 eoschar = force_reg (QImode, eoschar);
25404 src = replace_equiv_address_nv (src, scratch3);
25406 /* If .md starts supporting :P, this can be done in .md. */
25407 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25408 scratch4), UNSPEC_SCAS);
25409 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25410 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25411 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25413 return true;
25416 /* For given symbol (function) construct code to compute address of it's PLT
25417 entry in large x86-64 PIC model. */
25418 static rtx
25419 construct_plt_address (rtx symbol)
25421 rtx tmp, unspec;
25423 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25424 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25425 gcc_assert (Pmode == DImode);
25427 tmp = gen_reg_rtx (Pmode);
25428 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25430 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25431 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25432 return tmp;
25436 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25437 rtx callarg2,
25438 rtx pop, bool sibcall)
25440 rtx vec[3];
25441 rtx use = NULL, call;
25442 unsigned int vec_len = 0;
25444 if (pop == const0_rtx)
25445 pop = NULL;
25446 gcc_assert (!TARGET_64BIT || !pop);
25448 if (TARGET_MACHO && !TARGET_64BIT)
25450 #if TARGET_MACHO
25451 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25452 fnaddr = machopic_indirect_call_target (fnaddr);
25453 #endif
25455 else
25457 /* Static functions and indirect calls don't need the pic register. */
25458 if (flag_pic
25459 && (!TARGET_64BIT
25460 || (ix86_cmodel == CM_LARGE_PIC
25461 && DEFAULT_ABI != MS_ABI))
25462 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25463 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
25465 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25466 if (ix86_use_pseudo_pic_reg ())
25467 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25468 pic_offset_table_rtx);
25472 /* Skip setting up RAX register for -mskip-rax-setup when there are no
25473 parameters passed in vector registers. */
25474 if (TARGET_64BIT
25475 && (INTVAL (callarg2) > 0
25476 || (INTVAL (callarg2) == 0
25477 && (TARGET_SSE || !flag_skip_rax_setup))))
25479 rtx al = gen_rtx_REG (QImode, AX_REG);
25480 emit_move_insn (al, callarg2);
25481 use_reg (&use, al);
25484 if (ix86_cmodel == CM_LARGE_PIC
25485 && !TARGET_PECOFF
25486 && MEM_P (fnaddr)
25487 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25488 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25489 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25490 else if (sibcall
25491 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25492 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25494 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25495 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25498 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25500 if (retval)
25502 /* We should add bounds as destination register in case
25503 pointer with bounds may be returned. */
25504 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25506 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25507 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25508 if (GET_CODE (retval) == PARALLEL)
25510 b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
25511 b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
25512 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
25513 retval = chkp_join_splitted_slot (retval, par);
25515 else
25517 retval = gen_rtx_PARALLEL (VOIDmode,
25518 gen_rtvec (3, retval, b0, b1));
25519 chkp_put_regs_to_expr_list (retval);
25523 call = gen_rtx_SET (VOIDmode, retval, call);
25525 vec[vec_len++] = call;
25527 if (pop)
25529 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25530 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
25531 vec[vec_len++] = pop;
25534 if (TARGET_64BIT_MS_ABI
25535 && (!callarg2 || INTVAL (callarg2) != -2))
25537 int const cregs_size
25538 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25539 int i;
25541 for (i = 0; i < cregs_size; i++)
25543 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25544 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25546 clobber_reg (&use, gen_rtx_REG (mode, regno));
25550 if (vec_len > 1)
25551 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25552 call = emit_call_insn (call);
25553 if (use)
25554 CALL_INSN_FUNCTION_USAGE (call) = use;
25556 return call;
25559 /* Output the assembly for a call instruction. */
25561 const char *
25562 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25564 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25565 bool seh_nop_p = false;
25566 const char *xasm;
25568 if (SIBLING_CALL_P (insn))
25570 if (direct_p)
25571 xasm = "%!jmp\t%P0";
25572 /* SEH epilogue detection requires the indirect branch case
25573 to include REX.W. */
25574 else if (TARGET_SEH)
25575 xasm = "%!rex.W jmp %A0";
25576 else
25577 xasm = "%!jmp\t%A0";
25579 output_asm_insn (xasm, &call_op);
25580 return "";
25583 /* SEH unwinding can require an extra nop to be emitted in several
25584 circumstances. Determine if we have one of those. */
25585 if (TARGET_SEH)
25587 rtx_insn *i;
25589 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25591 /* If we get to another real insn, we don't need the nop. */
25592 if (INSN_P (i))
25593 break;
25595 /* If we get to the epilogue note, prevent a catch region from
25596 being adjacent to the standard epilogue sequence. If non-
25597 call-exceptions, we'll have done this during epilogue emission. */
25598 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25599 && !flag_non_call_exceptions
25600 && !can_throw_internal (insn))
25602 seh_nop_p = true;
25603 break;
25607 /* If we didn't find a real insn following the call, prevent the
25608 unwinder from looking into the next function. */
25609 if (i == NULL)
25610 seh_nop_p = true;
25613 if (direct_p)
25614 xasm = "%!call\t%P0";
25615 else
25616 xasm = "%!call\t%A0";
25618 output_asm_insn (xasm, &call_op);
25620 if (seh_nop_p)
25621 return "nop";
25623 return "";
25626 /* Clear stack slot assignments remembered from previous functions.
25627 This is called from INIT_EXPANDERS once before RTL is emitted for each
25628 function. */
25630 static struct machine_function *
25631 ix86_init_machine_status (void)
25633 struct machine_function *f;
25635 f = ggc_cleared_alloc<machine_function> ();
25636 f->use_fast_prologue_epilogue_nregs = -1;
25637 f->call_abi = ix86_abi;
25639 return f;
25642 /* Return a MEM corresponding to a stack slot with mode MODE.
25643 Allocate a new slot if necessary.
25645 The RTL for a function can have several slots available: N is
25646 which slot to use. */
25649 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25651 struct stack_local_entry *s;
25653 gcc_assert (n < MAX_386_STACK_LOCALS);
25655 for (s = ix86_stack_locals; s; s = s->next)
25656 if (s->mode == mode && s->n == n)
25657 return validize_mem (copy_rtx (s->rtl));
25659 s = ggc_alloc<stack_local_entry> ();
25660 s->n = n;
25661 s->mode = mode;
25662 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25664 s->next = ix86_stack_locals;
25665 ix86_stack_locals = s;
25666 return validize_mem (copy_rtx (s->rtl));
25669 static void
25670 ix86_instantiate_decls (void)
25672 struct stack_local_entry *s;
25674 for (s = ix86_stack_locals; s; s = s->next)
25675 if (s->rtl != NULL_RTX)
25676 instantiate_decl_rtl (s->rtl);
25679 /* Check whether x86 address PARTS is a pc-relative address. */
25681 static bool
25682 rip_relative_addr_p (struct ix86_address *parts)
25684 rtx base, index, disp;
25686 base = parts->base;
25687 index = parts->index;
25688 disp = parts->disp;
25690 if (disp && !base && !index)
25692 if (TARGET_64BIT)
25694 rtx symbol = disp;
25696 if (GET_CODE (disp) == CONST)
25697 symbol = XEXP (disp, 0);
25698 if (GET_CODE (symbol) == PLUS
25699 && CONST_INT_P (XEXP (symbol, 1)))
25700 symbol = XEXP (symbol, 0);
25702 if (GET_CODE (symbol) == LABEL_REF
25703 || (GET_CODE (symbol) == SYMBOL_REF
25704 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25705 || (GET_CODE (symbol) == UNSPEC
25706 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25707 || XINT (symbol, 1) == UNSPEC_PCREL
25708 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25709 return true;
25712 return false;
25715 /* Calculate the length of the memory address in the instruction encoding.
25716 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25717 or other prefixes. We never generate addr32 prefix for LEA insn. */
25720 memory_address_length (rtx addr, bool lea)
25722 struct ix86_address parts;
25723 rtx base, index, disp;
25724 int len;
25725 int ok;
25727 if (GET_CODE (addr) == PRE_DEC
25728 || GET_CODE (addr) == POST_INC
25729 || GET_CODE (addr) == PRE_MODIFY
25730 || GET_CODE (addr) == POST_MODIFY)
25731 return 0;
25733 ok = ix86_decompose_address (addr, &parts);
25734 gcc_assert (ok);
25736 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25738 /* If this is not LEA instruction, add the length of addr32 prefix. */
25739 if (TARGET_64BIT && !lea
25740 && (SImode_address_operand (addr, VOIDmode)
25741 || (parts.base && GET_MODE (parts.base) == SImode)
25742 || (parts.index && GET_MODE (parts.index) == SImode)))
25743 len++;
25745 base = parts.base;
25746 index = parts.index;
25747 disp = parts.disp;
25749 if (base && GET_CODE (base) == SUBREG)
25750 base = SUBREG_REG (base);
25751 if (index && GET_CODE (index) == SUBREG)
25752 index = SUBREG_REG (index);
25754 gcc_assert (base == NULL_RTX || REG_P (base));
25755 gcc_assert (index == NULL_RTX || REG_P (index));
25757 /* Rule of thumb:
25758 - esp as the base always wants an index,
25759 - ebp as the base always wants a displacement,
25760 - r12 as the base always wants an index,
25761 - r13 as the base always wants a displacement. */
25763 /* Register Indirect. */
25764 if (base && !index && !disp)
25766 /* esp (for its index) and ebp (for its displacement) need
25767 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25768 code. */
25769 if (base == arg_pointer_rtx
25770 || base == frame_pointer_rtx
25771 || REGNO (base) == SP_REG
25772 || REGNO (base) == BP_REG
25773 || REGNO (base) == R12_REG
25774 || REGNO (base) == R13_REG)
25775 len++;
25778 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25779 is not disp32, but disp32(%rip), so for disp32
25780 SIB byte is needed, unless print_operand_address
25781 optimizes it into disp32(%rip) or (%rip) is implied
25782 by UNSPEC. */
25783 else if (disp && !base && !index)
25785 len += 4;
25786 if (rip_relative_addr_p (&parts))
25787 len++;
25789 else
25791 /* Find the length of the displacement constant. */
25792 if (disp)
25794 if (base && satisfies_constraint_K (disp))
25795 len += 1;
25796 else
25797 len += 4;
25799 /* ebp always wants a displacement. Similarly r13. */
25800 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25801 len++;
25803 /* An index requires the two-byte modrm form.... */
25804 if (index
25805 /* ...like esp (or r12), which always wants an index. */
25806 || base == arg_pointer_rtx
25807 || base == frame_pointer_rtx
25808 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25809 len++;
25812 return len;
25815 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25816 is set, expect that insn have 8bit immediate alternative. */
25818 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25820 int len = 0;
25821 int i;
25822 extract_insn_cached (insn);
25823 for (i = recog_data.n_operands - 1; i >= 0; --i)
25824 if (CONSTANT_P (recog_data.operand[i]))
25826 enum attr_mode mode = get_attr_mode (insn);
25828 gcc_assert (!len);
25829 if (shortform && CONST_INT_P (recog_data.operand[i]))
25831 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25832 switch (mode)
25834 case MODE_QI:
25835 len = 1;
25836 continue;
25837 case MODE_HI:
25838 ival = trunc_int_for_mode (ival, HImode);
25839 break;
25840 case MODE_SI:
25841 ival = trunc_int_for_mode (ival, SImode);
25842 break;
25843 default:
25844 break;
25846 if (IN_RANGE (ival, -128, 127))
25848 len = 1;
25849 continue;
25852 switch (mode)
25854 case MODE_QI:
25855 len = 1;
25856 break;
25857 case MODE_HI:
25858 len = 2;
25859 break;
25860 case MODE_SI:
25861 len = 4;
25862 break;
25863 /* Immediates for DImode instructions are encoded
25864 as 32bit sign extended values. */
25865 case MODE_DI:
25866 len = 4;
25867 break;
25868 default:
25869 fatal_insn ("unknown insn mode", insn);
25872 return len;
25875 /* Compute default value for "length_address" attribute. */
25877 ix86_attr_length_address_default (rtx_insn *insn)
25879 int i;
25881 if (get_attr_type (insn) == TYPE_LEA)
25883 rtx set = PATTERN (insn), addr;
25885 if (GET_CODE (set) == PARALLEL)
25886 set = XVECEXP (set, 0, 0);
25888 gcc_assert (GET_CODE (set) == SET);
25890 addr = SET_SRC (set);
25892 return memory_address_length (addr, true);
25895 extract_insn_cached (insn);
25896 for (i = recog_data.n_operands - 1; i >= 0; --i)
25897 if (MEM_P (recog_data.operand[i]))
25899 constrain_operands_cached (insn, reload_completed);
25900 if (which_alternative != -1)
25902 const char *constraints = recog_data.constraints[i];
25903 int alt = which_alternative;
25905 while (*constraints == '=' || *constraints == '+')
25906 constraints++;
25907 while (alt-- > 0)
25908 while (*constraints++ != ',')
25910 /* Skip ignored operands. */
25911 if (*constraints == 'X')
25912 continue;
25914 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25916 return 0;
25919 /* Compute default value for "length_vex" attribute. It includes
25920 2 or 3 byte VEX prefix and 1 opcode byte. */
25923 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
25924 bool has_vex_w)
25926 int i;
25928 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25929 byte VEX prefix. */
25930 if (!has_0f_opcode || has_vex_w)
25931 return 3 + 1;
25933 /* We can always use 2 byte VEX prefix in 32bit. */
25934 if (!TARGET_64BIT)
25935 return 2 + 1;
25937 extract_insn_cached (insn);
25939 for (i = recog_data.n_operands - 1; i >= 0; --i)
25940 if (REG_P (recog_data.operand[i]))
25942 /* REX.W bit uses 3 byte VEX prefix. */
25943 if (GET_MODE (recog_data.operand[i]) == DImode
25944 && GENERAL_REG_P (recog_data.operand[i]))
25945 return 3 + 1;
25947 else
25949 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25950 if (MEM_P (recog_data.operand[i])
25951 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25952 return 3 + 1;
25955 return 2 + 1;
25958 /* Return the maximum number of instructions a cpu can issue. */
25960 static int
25961 ix86_issue_rate (void)
25963 switch (ix86_tune)
25965 case PROCESSOR_PENTIUM:
25966 case PROCESSOR_BONNELL:
25967 case PROCESSOR_SILVERMONT:
25968 case PROCESSOR_KNL:
25969 case PROCESSOR_INTEL:
25970 case PROCESSOR_K6:
25971 case PROCESSOR_BTVER2:
25972 case PROCESSOR_PENTIUM4:
25973 case PROCESSOR_NOCONA:
25974 return 2;
25976 case PROCESSOR_PENTIUMPRO:
25977 case PROCESSOR_ATHLON:
25978 case PROCESSOR_K8:
25979 case PROCESSOR_AMDFAM10:
25980 case PROCESSOR_GENERIC:
25981 case PROCESSOR_BTVER1:
25982 return 3;
25984 case PROCESSOR_BDVER1:
25985 case PROCESSOR_BDVER2:
25986 case PROCESSOR_BDVER3:
25987 case PROCESSOR_BDVER4:
25988 case PROCESSOR_CORE2:
25989 case PROCESSOR_NEHALEM:
25990 case PROCESSOR_SANDYBRIDGE:
25991 case PROCESSOR_HASWELL:
25992 return 4;
25994 default:
25995 return 1;
25999 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
26000 by DEP_INSN and nothing set by DEP_INSN. */
26002 static bool
26003 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
26005 rtx set, set2;
26007 /* Simplify the test for uninteresting insns. */
26008 if (insn_type != TYPE_SETCC
26009 && insn_type != TYPE_ICMOV
26010 && insn_type != TYPE_FCMOV
26011 && insn_type != TYPE_IBR)
26012 return false;
26014 if ((set = single_set (dep_insn)) != 0)
26016 set = SET_DEST (set);
26017 set2 = NULL_RTX;
26019 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
26020 && XVECLEN (PATTERN (dep_insn), 0) == 2
26021 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
26022 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
26024 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26025 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26027 else
26028 return false;
26030 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
26031 return false;
26033 /* This test is true if the dependent insn reads the flags but
26034 not any other potentially set register. */
26035 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
26036 return false;
26038 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
26039 return false;
26041 return true;
26044 /* Return true iff USE_INSN has a memory address with operands set by
26045 SET_INSN. */
26047 bool
26048 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
26050 int i;
26051 extract_insn_cached (use_insn);
26052 for (i = recog_data.n_operands - 1; i >= 0; --i)
26053 if (MEM_P (recog_data.operand[i]))
26055 rtx addr = XEXP (recog_data.operand[i], 0);
26056 return modified_in_p (addr, set_insn) != 0;
26058 return false;
26061 /* Helper function for exact_store_load_dependency.
26062 Return true if addr is found in insn. */
26063 static bool
26064 exact_dependency_1 (rtx addr, rtx insn)
26066 enum rtx_code code;
26067 const char *format_ptr;
26068 int i, j;
26070 code = GET_CODE (insn);
26071 switch (code)
26073 case MEM:
26074 if (rtx_equal_p (addr, insn))
26075 return true;
26076 break;
26077 case REG:
26078 CASE_CONST_ANY:
26079 case SYMBOL_REF:
26080 case CODE_LABEL:
26081 case PC:
26082 case CC0:
26083 case EXPR_LIST:
26084 return false;
26085 default:
26086 break;
26089 format_ptr = GET_RTX_FORMAT (code);
26090 for (i = 0; i < GET_RTX_LENGTH (code); i++)
26092 switch (*format_ptr++)
26094 case 'e':
26095 if (exact_dependency_1 (addr, XEXP (insn, i)))
26096 return true;
26097 break;
26098 case 'E':
26099 for (j = 0; j < XVECLEN (insn, i); j++)
26100 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
26101 return true;
26102 break;
26105 return false;
26108 /* Return true if there exists exact dependency for store & load, i.e.
26109 the same memory address is used in them. */
26110 static bool
26111 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
26113 rtx set1, set2;
26115 set1 = single_set (store);
26116 if (!set1)
26117 return false;
26118 if (!MEM_P (SET_DEST (set1)))
26119 return false;
26120 set2 = single_set (load);
26121 if (!set2)
26122 return false;
26123 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26124 return true;
26125 return false;
26128 static int
26129 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26131 enum attr_type insn_type, dep_insn_type;
26132 enum attr_memory memory;
26133 rtx set, set2;
26134 int dep_insn_code_number;
26136 /* Anti and output dependencies have zero cost on all CPUs. */
26137 if (REG_NOTE_KIND (link) != 0)
26138 return 0;
26140 dep_insn_code_number = recog_memoized (dep_insn);
26142 /* If we can't recognize the insns, we can't really do anything. */
26143 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26144 return cost;
26146 insn_type = get_attr_type (insn);
26147 dep_insn_type = get_attr_type (dep_insn);
26149 switch (ix86_tune)
26151 case PROCESSOR_PENTIUM:
26152 /* Address Generation Interlock adds a cycle of latency. */
26153 if (insn_type == TYPE_LEA)
26155 rtx addr = PATTERN (insn);
26157 if (GET_CODE (addr) == PARALLEL)
26158 addr = XVECEXP (addr, 0, 0);
26160 gcc_assert (GET_CODE (addr) == SET);
26162 addr = SET_SRC (addr);
26163 if (modified_in_p (addr, dep_insn))
26164 cost += 1;
26166 else if (ix86_agi_dependent (dep_insn, insn))
26167 cost += 1;
26169 /* ??? Compares pair with jump/setcc. */
26170 if (ix86_flags_dependent (insn, dep_insn, insn_type))
26171 cost = 0;
26173 /* Floating point stores require value to be ready one cycle earlier. */
26174 if (insn_type == TYPE_FMOV
26175 && get_attr_memory (insn) == MEMORY_STORE
26176 && !ix86_agi_dependent (dep_insn, insn))
26177 cost += 1;
26178 break;
26180 case PROCESSOR_PENTIUMPRO:
26181 /* INT->FP conversion is expensive. */
26182 if (get_attr_fp_int_src (dep_insn))
26183 cost += 5;
26185 /* There is one cycle extra latency between an FP op and a store. */
26186 if (insn_type == TYPE_FMOV
26187 && (set = single_set (dep_insn)) != NULL_RTX
26188 && (set2 = single_set (insn)) != NULL_RTX
26189 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26190 && MEM_P (SET_DEST (set2)))
26191 cost += 1;
26193 memory = get_attr_memory (insn);
26195 /* Show ability of reorder buffer to hide latency of load by executing
26196 in parallel with previous instruction in case
26197 previous instruction is not needed to compute the address. */
26198 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26199 && !ix86_agi_dependent (dep_insn, insn))
26201 /* Claim moves to take one cycle, as core can issue one load
26202 at time and the next load can start cycle later. */
26203 if (dep_insn_type == TYPE_IMOV
26204 || dep_insn_type == TYPE_FMOV)
26205 cost = 1;
26206 else if (cost > 1)
26207 cost--;
26209 break;
26211 case PROCESSOR_K6:
26212 /* The esp dependency is resolved before
26213 the instruction is really finished. */
26214 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26215 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26216 return 1;
26218 /* INT->FP conversion is expensive. */
26219 if (get_attr_fp_int_src (dep_insn))
26220 cost += 5;
26222 memory = get_attr_memory (insn);
26224 /* Show ability of reorder buffer to hide latency of load by executing
26225 in parallel with previous instruction in case
26226 previous instruction is not needed to compute the address. */
26227 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26228 && !ix86_agi_dependent (dep_insn, insn))
26230 /* Claim moves to take one cycle, as core can issue one load
26231 at time and the next load can start cycle later. */
26232 if (dep_insn_type == TYPE_IMOV
26233 || dep_insn_type == TYPE_FMOV)
26234 cost = 1;
26235 else if (cost > 2)
26236 cost -= 2;
26237 else
26238 cost = 1;
26240 break;
26242 case PROCESSOR_AMDFAM10:
26243 case PROCESSOR_BDVER1:
26244 case PROCESSOR_BDVER2:
26245 case PROCESSOR_BDVER3:
26246 case PROCESSOR_BDVER4:
26247 case PROCESSOR_BTVER1:
26248 case PROCESSOR_BTVER2:
26249 case PROCESSOR_GENERIC:
26250 /* Stack engine allows to execute push&pop instructions in parall. */
26251 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26252 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26253 return 0;
26254 /* FALLTHRU */
26256 case PROCESSOR_ATHLON:
26257 case PROCESSOR_K8:
26258 memory = get_attr_memory (insn);
26260 /* Show ability of reorder buffer to hide latency of load by executing
26261 in parallel with previous instruction in case
26262 previous instruction is not needed to compute the address. */
26263 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26264 && !ix86_agi_dependent (dep_insn, insn))
26266 enum attr_unit unit = get_attr_unit (insn);
26267 int loadcost = 3;
26269 /* Because of the difference between the length of integer and
26270 floating unit pipeline preparation stages, the memory operands
26271 for floating point are cheaper.
26273 ??? For Athlon it the difference is most probably 2. */
26274 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26275 loadcost = 3;
26276 else
26277 loadcost = TARGET_ATHLON ? 2 : 0;
26279 if (cost >= loadcost)
26280 cost -= loadcost;
26281 else
26282 cost = 0;
26284 break;
26286 case PROCESSOR_CORE2:
26287 case PROCESSOR_NEHALEM:
26288 case PROCESSOR_SANDYBRIDGE:
26289 case PROCESSOR_HASWELL:
26290 /* Stack engine allows to execute push&pop instructions in parall. */
26291 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26292 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26293 return 0;
26295 memory = get_attr_memory (insn);
26297 /* Show ability of reorder buffer to hide latency of load by executing
26298 in parallel with previous instruction in case
26299 previous instruction is not needed to compute the address. */
26300 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26301 && !ix86_agi_dependent (dep_insn, insn))
26303 if (cost >= 4)
26304 cost -= 4;
26305 else
26306 cost = 0;
26308 break;
26310 case PROCESSOR_SILVERMONT:
26311 case PROCESSOR_KNL:
26312 case PROCESSOR_INTEL:
26313 if (!reload_completed)
26314 return cost;
26316 /* Increase cost of integer loads. */
26317 memory = get_attr_memory (dep_insn);
26318 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26320 enum attr_unit unit = get_attr_unit (dep_insn);
26321 if (unit == UNIT_INTEGER && cost == 1)
26323 if (memory == MEMORY_LOAD)
26324 cost = 3;
26325 else
26327 /* Increase cost of ld/st for short int types only
26328 because of store forwarding issue. */
26329 rtx set = single_set (dep_insn);
26330 if (set && (GET_MODE (SET_DEST (set)) == QImode
26331 || GET_MODE (SET_DEST (set)) == HImode))
26333 /* Increase cost of store/load insn if exact
26334 dependence exists and it is load insn. */
26335 enum attr_memory insn_memory = get_attr_memory (insn);
26336 if (insn_memory == MEMORY_LOAD
26337 && exact_store_load_dependency (dep_insn, insn))
26338 cost = 3;
26344 default:
26345 break;
26348 return cost;
26351 /* How many alternative schedules to try. This should be as wide as the
26352 scheduling freedom in the DFA, but no wider. Making this value too
26353 large results extra work for the scheduler. */
26355 static int
26356 ia32_multipass_dfa_lookahead (void)
26358 switch (ix86_tune)
26360 case PROCESSOR_PENTIUM:
26361 return 2;
26363 case PROCESSOR_PENTIUMPRO:
26364 case PROCESSOR_K6:
26365 return 1;
26367 case PROCESSOR_BDVER1:
26368 case PROCESSOR_BDVER2:
26369 case PROCESSOR_BDVER3:
26370 case PROCESSOR_BDVER4:
26371 /* We use lookahead value 4 for BD both before and after reload
26372 schedules. Plan is to have value 8 included for O3. */
26373 return 4;
26375 case PROCESSOR_CORE2:
26376 case PROCESSOR_NEHALEM:
26377 case PROCESSOR_SANDYBRIDGE:
26378 case PROCESSOR_HASWELL:
26379 case PROCESSOR_BONNELL:
26380 case PROCESSOR_SILVERMONT:
26381 case PROCESSOR_KNL:
26382 case PROCESSOR_INTEL:
26383 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26384 as many instructions can be executed on a cycle, i.e.,
26385 issue_rate. I wonder why tuning for many CPUs does not do this. */
26386 if (reload_completed)
26387 return ix86_issue_rate ();
26388 /* Don't use lookahead for pre-reload schedule to save compile time. */
26389 return 0;
26391 default:
26392 return 0;
26396 /* Return true if target platform supports macro-fusion. */
26398 static bool
26399 ix86_macro_fusion_p ()
26401 return TARGET_FUSE_CMP_AND_BRANCH;
26404 /* Check whether current microarchitecture support macro fusion
26405 for insn pair "CONDGEN + CONDJMP". Refer to
26406 "Intel Architectures Optimization Reference Manual". */
26408 static bool
26409 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26411 rtx src, dest;
26412 enum rtx_code ccode;
26413 rtx compare_set = NULL_RTX, test_if, cond;
26414 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26416 if (!any_condjump_p (condjmp))
26417 return false;
26419 if (get_attr_type (condgen) != TYPE_TEST
26420 && get_attr_type (condgen) != TYPE_ICMP
26421 && get_attr_type (condgen) != TYPE_INCDEC
26422 && get_attr_type (condgen) != TYPE_ALU)
26423 return false;
26425 compare_set = single_set (condgen);
26426 if (compare_set == NULL_RTX
26427 && !TARGET_FUSE_ALU_AND_BRANCH)
26428 return false;
26430 if (compare_set == NULL_RTX)
26432 int i;
26433 rtx pat = PATTERN (condgen);
26434 for (i = 0; i < XVECLEN (pat, 0); i++)
26435 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26437 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26438 if (GET_CODE (set_src) == COMPARE)
26439 compare_set = XVECEXP (pat, 0, i);
26440 else
26441 alu_set = XVECEXP (pat, 0, i);
26444 if (compare_set == NULL_RTX)
26445 return false;
26446 src = SET_SRC (compare_set);
26447 if (GET_CODE (src) != COMPARE)
26448 return false;
26450 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26451 supported. */
26452 if ((MEM_P (XEXP (src, 0))
26453 && CONST_INT_P (XEXP (src, 1)))
26454 || (MEM_P (XEXP (src, 1))
26455 && CONST_INT_P (XEXP (src, 0))))
26456 return false;
26458 /* No fusion for RIP-relative address. */
26459 if (MEM_P (XEXP (src, 0)))
26460 addr = XEXP (XEXP (src, 0), 0);
26461 else if (MEM_P (XEXP (src, 1)))
26462 addr = XEXP (XEXP (src, 1), 0);
26464 if (addr) {
26465 ix86_address parts;
26466 int ok = ix86_decompose_address (addr, &parts);
26467 gcc_assert (ok);
26469 if (rip_relative_addr_p (&parts))
26470 return false;
26473 test_if = SET_SRC (pc_set (condjmp));
26474 cond = XEXP (test_if, 0);
26475 ccode = GET_CODE (cond);
26476 /* Check whether conditional jump use Sign or Overflow Flags. */
26477 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26478 && (ccode == GE
26479 || ccode == GT
26480 || ccode == LE
26481 || ccode == LT))
26482 return false;
26484 /* Return true for TYPE_TEST and TYPE_ICMP. */
26485 if (get_attr_type (condgen) == TYPE_TEST
26486 || get_attr_type (condgen) == TYPE_ICMP)
26487 return true;
26489 /* The following is the case that macro-fusion for alu + jmp. */
26490 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26491 return false;
26493 /* No fusion for alu op with memory destination operand. */
26494 dest = SET_DEST (alu_set);
26495 if (MEM_P (dest))
26496 return false;
26498 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26499 supported. */
26500 if (get_attr_type (condgen) == TYPE_INCDEC
26501 && (ccode == GEU
26502 || ccode == GTU
26503 || ccode == LEU
26504 || ccode == LTU))
26505 return false;
26507 return true;
26510 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26511 execution. It is applied if
26512 (1) IMUL instruction is on the top of list;
26513 (2) There exists the only producer of independent IMUL instruction in
26514 ready list.
26515 Return index of IMUL producer if it was found and -1 otherwise. */
26516 static int
26517 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26519 rtx_insn *insn;
26520 rtx set, insn1, insn2;
26521 sd_iterator_def sd_it;
26522 dep_t dep;
26523 int index = -1;
26524 int i;
26526 if (!TARGET_BONNELL)
26527 return index;
26529 /* Check that IMUL instruction is on the top of ready list. */
26530 insn = ready[n_ready - 1];
26531 set = single_set (insn);
26532 if (!set)
26533 return index;
26534 if (!(GET_CODE (SET_SRC (set)) == MULT
26535 && GET_MODE (SET_SRC (set)) == SImode))
26536 return index;
26538 /* Search for producer of independent IMUL instruction. */
26539 for (i = n_ready - 2; i >= 0; i--)
26541 insn = ready[i];
26542 if (!NONDEBUG_INSN_P (insn))
26543 continue;
26544 /* Skip IMUL instruction. */
26545 insn2 = PATTERN (insn);
26546 if (GET_CODE (insn2) == PARALLEL)
26547 insn2 = XVECEXP (insn2, 0, 0);
26548 if (GET_CODE (insn2) == SET
26549 && GET_CODE (SET_SRC (insn2)) == MULT
26550 && GET_MODE (SET_SRC (insn2)) == SImode)
26551 continue;
26553 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26555 rtx con;
26556 con = DEP_CON (dep);
26557 if (!NONDEBUG_INSN_P (con))
26558 continue;
26559 insn1 = PATTERN (con);
26560 if (GET_CODE (insn1) == PARALLEL)
26561 insn1 = XVECEXP (insn1, 0, 0);
26563 if (GET_CODE (insn1) == SET
26564 && GET_CODE (SET_SRC (insn1)) == MULT
26565 && GET_MODE (SET_SRC (insn1)) == SImode)
26567 sd_iterator_def sd_it1;
26568 dep_t dep1;
26569 /* Check if there is no other dependee for IMUL. */
26570 index = i;
26571 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26573 rtx pro;
26574 pro = DEP_PRO (dep1);
26575 if (!NONDEBUG_INSN_P (pro))
26576 continue;
26577 if (pro != insn)
26578 index = -1;
26580 if (index >= 0)
26581 break;
26584 if (index >= 0)
26585 break;
26587 return index;
26590 /* Try to find the best candidate on the top of ready list if two insns
26591 have the same priority - candidate is best if its dependees were
26592 scheduled earlier. Applied for Silvermont only.
26593 Return true if top 2 insns must be interchanged. */
26594 static bool
26595 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26597 rtx_insn *top = ready[n_ready - 1];
26598 rtx_insn *next = ready[n_ready - 2];
26599 rtx set;
26600 sd_iterator_def sd_it;
26601 dep_t dep;
26602 int clock1 = -1;
26603 int clock2 = -1;
26604 #define INSN_TICK(INSN) (HID (INSN)->tick)
26606 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26607 return false;
26609 if (!NONDEBUG_INSN_P (top))
26610 return false;
26611 if (!NONJUMP_INSN_P (top))
26612 return false;
26613 if (!NONDEBUG_INSN_P (next))
26614 return false;
26615 if (!NONJUMP_INSN_P (next))
26616 return false;
26617 set = single_set (top);
26618 if (!set)
26619 return false;
26620 set = single_set (next);
26621 if (!set)
26622 return false;
26624 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26626 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26627 return false;
26628 /* Determine winner more precise. */
26629 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26631 rtx pro;
26632 pro = DEP_PRO (dep);
26633 if (!NONDEBUG_INSN_P (pro))
26634 continue;
26635 if (INSN_TICK (pro) > clock1)
26636 clock1 = INSN_TICK (pro);
26638 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26640 rtx pro;
26641 pro = DEP_PRO (dep);
26642 if (!NONDEBUG_INSN_P (pro))
26643 continue;
26644 if (INSN_TICK (pro) > clock2)
26645 clock2 = INSN_TICK (pro);
26648 if (clock1 == clock2)
26650 /* Determine winner - load must win. */
26651 enum attr_memory memory1, memory2;
26652 memory1 = get_attr_memory (top);
26653 memory2 = get_attr_memory (next);
26654 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26655 return true;
26657 return (bool) (clock2 < clock1);
26659 return false;
26660 #undef INSN_TICK
26663 /* Perform possible reodering of ready list for Atom/Silvermont only.
26664 Return issue rate. */
26665 static int
26666 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26667 int *pn_ready, int clock_var)
26669 int issue_rate = -1;
26670 int n_ready = *pn_ready;
26671 int i;
26672 rtx_insn *insn;
26673 int index = -1;
26675 /* Set up issue rate. */
26676 issue_rate = ix86_issue_rate ();
26678 /* Do reodering for BONNELL/SILVERMONT only. */
26679 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26680 return issue_rate;
26682 /* Nothing to do if ready list contains only 1 instruction. */
26683 if (n_ready <= 1)
26684 return issue_rate;
26686 /* Do reodering for post-reload scheduler only. */
26687 if (!reload_completed)
26688 return issue_rate;
26690 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26692 if (sched_verbose > 1)
26693 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26694 INSN_UID (ready[index]));
26696 /* Put IMUL producer (ready[index]) at the top of ready list. */
26697 insn = ready[index];
26698 for (i = index; i < n_ready - 1; i++)
26699 ready[i] = ready[i + 1];
26700 ready[n_ready - 1] = insn;
26701 return issue_rate;
26704 /* Skip selective scheduling since HID is not populated in it. */
26705 if (clock_var != 0
26706 && !sel_sched_p ()
26707 && swap_top_of_ready_list (ready, n_ready))
26709 if (sched_verbose > 1)
26710 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26711 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26712 /* Swap 2 top elements of ready list. */
26713 insn = ready[n_ready - 1];
26714 ready[n_ready - 1] = ready[n_ready - 2];
26715 ready[n_ready - 2] = insn;
26717 return issue_rate;
26720 static bool
26721 ix86_class_likely_spilled_p (reg_class_t);
26723 /* Returns true if lhs of insn is HW function argument register and set up
26724 is_spilled to true if it is likely spilled HW register. */
26725 static bool
26726 insn_is_function_arg (rtx insn, bool* is_spilled)
26728 rtx dst;
26730 if (!NONDEBUG_INSN_P (insn))
26731 return false;
26732 /* Call instructions are not movable, ignore it. */
26733 if (CALL_P (insn))
26734 return false;
26735 insn = PATTERN (insn);
26736 if (GET_CODE (insn) == PARALLEL)
26737 insn = XVECEXP (insn, 0, 0);
26738 if (GET_CODE (insn) != SET)
26739 return false;
26740 dst = SET_DEST (insn);
26741 if (REG_P (dst) && HARD_REGISTER_P (dst)
26742 && ix86_function_arg_regno_p (REGNO (dst)))
26744 /* Is it likely spilled HW register? */
26745 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26746 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26747 *is_spilled = true;
26748 return true;
26750 return false;
26753 /* Add output dependencies for chain of function adjacent arguments if only
26754 there is a move to likely spilled HW register. Return first argument
26755 if at least one dependence was added or NULL otherwise. */
26756 static rtx_insn *
26757 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26759 rtx_insn *insn;
26760 rtx_insn *last = call;
26761 rtx_insn *first_arg = NULL;
26762 bool is_spilled = false;
26764 head = PREV_INSN (head);
26766 /* Find nearest to call argument passing instruction. */
26767 while (true)
26769 last = PREV_INSN (last);
26770 if (last == head)
26771 return NULL;
26772 if (!NONDEBUG_INSN_P (last))
26773 continue;
26774 if (insn_is_function_arg (last, &is_spilled))
26775 break;
26776 return NULL;
26779 first_arg = last;
26780 while (true)
26782 insn = PREV_INSN (last);
26783 if (!INSN_P (insn))
26784 break;
26785 if (insn == head)
26786 break;
26787 if (!NONDEBUG_INSN_P (insn))
26789 last = insn;
26790 continue;
26792 if (insn_is_function_arg (insn, &is_spilled))
26794 /* Add output depdendence between two function arguments if chain
26795 of output arguments contains likely spilled HW registers. */
26796 if (is_spilled)
26797 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26798 first_arg = last = insn;
26800 else
26801 break;
26803 if (!is_spilled)
26804 return NULL;
26805 return first_arg;
26808 /* Add output or anti dependency from insn to first_arg to restrict its code
26809 motion. */
26810 static void
26811 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26813 rtx set;
26814 rtx tmp;
26816 /* Add anti dependencies for bounds stores. */
26817 if (INSN_P (insn)
26818 && GET_CODE (PATTERN (insn)) == PARALLEL
26819 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
26820 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
26822 add_dependence (first_arg, insn, REG_DEP_ANTI);
26823 return;
26826 set = single_set (insn);
26827 if (!set)
26828 return;
26829 tmp = SET_DEST (set);
26830 if (REG_P (tmp))
26832 /* Add output dependency to the first function argument. */
26833 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26834 return;
26836 /* Add anti dependency. */
26837 add_dependence (first_arg, insn, REG_DEP_ANTI);
26840 /* Avoid cross block motion of function argument through adding dependency
26841 from the first non-jump instruction in bb. */
26842 static void
26843 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26845 rtx_insn *insn = BB_END (bb);
26847 while (insn)
26849 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26851 rtx set = single_set (insn);
26852 if (set)
26854 avoid_func_arg_motion (arg, insn);
26855 return;
26858 if (insn == BB_HEAD (bb))
26859 return;
26860 insn = PREV_INSN (insn);
26864 /* Hook for pre-reload schedule - avoid motion of function arguments
26865 passed in likely spilled HW registers. */
26866 static void
26867 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26869 rtx_insn *insn;
26870 rtx_insn *first_arg = NULL;
26871 if (reload_completed)
26872 return;
26873 while (head != tail && DEBUG_INSN_P (head))
26874 head = NEXT_INSN (head);
26875 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26876 if (INSN_P (insn) && CALL_P (insn))
26878 first_arg = add_parameter_dependencies (insn, head);
26879 if (first_arg)
26881 /* Add dependee for first argument to predecessors if only
26882 region contains more than one block. */
26883 basic_block bb = BLOCK_FOR_INSN (insn);
26884 int rgn = CONTAINING_RGN (bb->index);
26885 int nr_blks = RGN_NR_BLOCKS (rgn);
26886 /* Skip trivial regions and region head blocks that can have
26887 predecessors outside of region. */
26888 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26890 edge e;
26891 edge_iterator ei;
26893 /* Regions are SCCs with the exception of selective
26894 scheduling with pipelining of outer blocks enabled.
26895 So also check that immediate predecessors of a non-head
26896 block are in the same region. */
26897 FOR_EACH_EDGE (e, ei, bb->preds)
26899 /* Avoid creating of loop-carried dependencies through
26900 using topological ordering in the region. */
26901 if (rgn == CONTAINING_RGN (e->src->index)
26902 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26903 add_dependee_for_func_arg (first_arg, e->src);
26906 insn = first_arg;
26907 if (insn == head)
26908 break;
26911 else if (first_arg)
26912 avoid_func_arg_motion (first_arg, insn);
26915 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26916 HW registers to maximum, to schedule them at soon as possible. These are
26917 moves from function argument registers at the top of the function entry
26918 and moves from function return value registers after call. */
26919 static int
26920 ix86_adjust_priority (rtx_insn *insn, int priority)
26922 rtx set;
26924 if (reload_completed)
26925 return priority;
26927 if (!NONDEBUG_INSN_P (insn))
26928 return priority;
26930 set = single_set (insn);
26931 if (set)
26933 rtx tmp = SET_SRC (set);
26934 if (REG_P (tmp)
26935 && HARD_REGISTER_P (tmp)
26936 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26937 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26938 return current_sched_info->sched_max_insns_priority;
26941 return priority;
26944 /* Model decoder of Core 2/i7.
26945 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26946 track the instruction fetch block boundaries and make sure that long
26947 (9+ bytes) instructions are assigned to D0. */
26949 /* Maximum length of an insn that can be handled by
26950 a secondary decoder unit. '8' for Core 2/i7. */
26951 static int core2i7_secondary_decoder_max_insn_size;
26953 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26954 '16' for Core 2/i7. */
26955 static int core2i7_ifetch_block_size;
26957 /* Maximum number of instructions decoder can handle per cycle.
26958 '6' for Core 2/i7. */
26959 static int core2i7_ifetch_block_max_insns;
26961 typedef struct ix86_first_cycle_multipass_data_ *
26962 ix86_first_cycle_multipass_data_t;
26963 typedef const struct ix86_first_cycle_multipass_data_ *
26964 const_ix86_first_cycle_multipass_data_t;
26966 /* A variable to store target state across calls to max_issue within
26967 one cycle. */
26968 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26969 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26971 /* Initialize DATA. */
26972 static void
26973 core2i7_first_cycle_multipass_init (void *_data)
26975 ix86_first_cycle_multipass_data_t data
26976 = (ix86_first_cycle_multipass_data_t) _data;
26978 data->ifetch_block_len = 0;
26979 data->ifetch_block_n_insns = 0;
26980 data->ready_try_change = NULL;
26981 data->ready_try_change_size = 0;
26984 /* Advancing the cycle; reset ifetch block counts. */
26985 static void
26986 core2i7_dfa_post_advance_cycle (void)
26988 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26990 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26992 data->ifetch_block_len = 0;
26993 data->ifetch_block_n_insns = 0;
26996 static int min_insn_size (rtx_insn *);
26998 /* Filter out insns from ready_try that the core will not be able to issue
26999 on current cycle due to decoder. */
27000 static void
27001 core2i7_first_cycle_multipass_filter_ready_try
27002 (const_ix86_first_cycle_multipass_data_t data,
27003 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
27005 while (n_ready--)
27007 rtx_insn *insn;
27008 int insn_size;
27010 if (ready_try[n_ready])
27011 continue;
27013 insn = get_ready_element (n_ready);
27014 insn_size = min_insn_size (insn);
27016 if (/* If this is a too long an insn for a secondary decoder ... */
27017 (!first_cycle_insn_p
27018 && insn_size > core2i7_secondary_decoder_max_insn_size)
27019 /* ... or it would not fit into the ifetch block ... */
27020 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
27021 /* ... or the decoder is full already ... */
27022 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
27023 /* ... mask the insn out. */
27025 ready_try[n_ready] = 1;
27027 if (data->ready_try_change)
27028 bitmap_set_bit (data->ready_try_change, n_ready);
27033 /* Prepare for a new round of multipass lookahead scheduling. */
27034 static void
27035 core2i7_first_cycle_multipass_begin (void *_data,
27036 signed char *ready_try, int n_ready,
27037 bool first_cycle_insn_p)
27039 ix86_first_cycle_multipass_data_t data
27040 = (ix86_first_cycle_multipass_data_t) _data;
27041 const_ix86_first_cycle_multipass_data_t prev_data
27042 = ix86_first_cycle_multipass_data;
27044 /* Restore the state from the end of the previous round. */
27045 data->ifetch_block_len = prev_data->ifetch_block_len;
27046 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
27048 /* Filter instructions that cannot be issued on current cycle due to
27049 decoder restrictions. */
27050 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27051 first_cycle_insn_p);
27054 /* INSN is being issued in current solution. Account for its impact on
27055 the decoder model. */
27056 static void
27057 core2i7_first_cycle_multipass_issue (void *_data,
27058 signed char *ready_try, int n_ready,
27059 rtx_insn *insn, const void *_prev_data)
27061 ix86_first_cycle_multipass_data_t data
27062 = (ix86_first_cycle_multipass_data_t) _data;
27063 const_ix86_first_cycle_multipass_data_t prev_data
27064 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
27066 int insn_size = min_insn_size (insn);
27068 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
27069 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
27070 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
27071 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27073 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
27074 if (!data->ready_try_change)
27076 data->ready_try_change = sbitmap_alloc (n_ready);
27077 data->ready_try_change_size = n_ready;
27079 else if (data->ready_try_change_size < n_ready)
27081 data->ready_try_change = sbitmap_resize (data->ready_try_change,
27082 n_ready, 0);
27083 data->ready_try_change_size = n_ready;
27085 bitmap_clear (data->ready_try_change);
27087 /* Filter out insns from ready_try that the core will not be able to issue
27088 on current cycle due to decoder. */
27089 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27090 false);
27093 /* Revert the effect on ready_try. */
27094 static void
27095 core2i7_first_cycle_multipass_backtrack (const void *_data,
27096 signed char *ready_try,
27097 int n_ready ATTRIBUTE_UNUSED)
27099 const_ix86_first_cycle_multipass_data_t data
27100 = (const_ix86_first_cycle_multipass_data_t) _data;
27101 unsigned int i = 0;
27102 sbitmap_iterator sbi;
27104 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
27105 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
27107 ready_try[i] = 0;
27111 /* Save the result of multipass lookahead scheduling for the next round. */
27112 static void
27113 core2i7_first_cycle_multipass_end (const void *_data)
27115 const_ix86_first_cycle_multipass_data_t data
27116 = (const_ix86_first_cycle_multipass_data_t) _data;
27117 ix86_first_cycle_multipass_data_t next_data
27118 = ix86_first_cycle_multipass_data;
27120 if (data != NULL)
27122 next_data->ifetch_block_len = data->ifetch_block_len;
27123 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
27127 /* Deallocate target data. */
27128 static void
27129 core2i7_first_cycle_multipass_fini (void *_data)
27131 ix86_first_cycle_multipass_data_t data
27132 = (ix86_first_cycle_multipass_data_t) _data;
27134 if (data->ready_try_change)
27136 sbitmap_free (data->ready_try_change);
27137 data->ready_try_change = NULL;
27138 data->ready_try_change_size = 0;
27142 /* Prepare for scheduling pass. */
27143 static void
27144 ix86_sched_init_global (FILE *, int, int)
27146 /* Install scheduling hooks for current CPU. Some of these hooks are used
27147 in time-critical parts of the scheduler, so we only set them up when
27148 they are actually used. */
27149 switch (ix86_tune)
27151 case PROCESSOR_CORE2:
27152 case PROCESSOR_NEHALEM:
27153 case PROCESSOR_SANDYBRIDGE:
27154 case PROCESSOR_HASWELL:
27155 /* Do not perform multipass scheduling for pre-reload schedule
27156 to save compile time. */
27157 if (reload_completed)
27159 targetm.sched.dfa_post_advance_cycle
27160 = core2i7_dfa_post_advance_cycle;
27161 targetm.sched.first_cycle_multipass_init
27162 = core2i7_first_cycle_multipass_init;
27163 targetm.sched.first_cycle_multipass_begin
27164 = core2i7_first_cycle_multipass_begin;
27165 targetm.sched.first_cycle_multipass_issue
27166 = core2i7_first_cycle_multipass_issue;
27167 targetm.sched.first_cycle_multipass_backtrack
27168 = core2i7_first_cycle_multipass_backtrack;
27169 targetm.sched.first_cycle_multipass_end
27170 = core2i7_first_cycle_multipass_end;
27171 targetm.sched.first_cycle_multipass_fini
27172 = core2i7_first_cycle_multipass_fini;
27174 /* Set decoder parameters. */
27175 core2i7_secondary_decoder_max_insn_size = 8;
27176 core2i7_ifetch_block_size = 16;
27177 core2i7_ifetch_block_max_insns = 6;
27178 break;
27180 /* ... Fall through ... */
27181 default:
27182 targetm.sched.dfa_post_advance_cycle = NULL;
27183 targetm.sched.first_cycle_multipass_init = NULL;
27184 targetm.sched.first_cycle_multipass_begin = NULL;
27185 targetm.sched.first_cycle_multipass_issue = NULL;
27186 targetm.sched.first_cycle_multipass_backtrack = NULL;
27187 targetm.sched.first_cycle_multipass_end = NULL;
27188 targetm.sched.first_cycle_multipass_fini = NULL;
27189 break;
27194 /* Compute the alignment given to a constant that is being placed in memory.
27195 EXP is the constant and ALIGN is the alignment that the object would
27196 ordinarily have.
27197 The value of this function is used instead of that alignment to align
27198 the object. */
27201 ix86_constant_alignment (tree exp, int align)
27203 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27204 || TREE_CODE (exp) == INTEGER_CST)
27206 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27207 return 64;
27208 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27209 return 128;
27211 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27212 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27213 return BITS_PER_WORD;
27215 return align;
27218 /* Compute the alignment for a static variable.
27219 TYPE is the data type, and ALIGN is the alignment that
27220 the object would ordinarily have. The value of this function is used
27221 instead of that alignment to align the object. */
27224 ix86_data_alignment (tree type, int align, bool opt)
27226 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27227 for symbols from other compilation units or symbols that don't need
27228 to bind locally. In order to preserve some ABI compatibility with
27229 those compilers, ensure we don't decrease alignment from what we
27230 used to assume. */
27232 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
27234 /* A data structure, equal or greater than the size of a cache line
27235 (64 bytes in the Pentium 4 and other recent Intel processors, including
27236 processors based on Intel Core microarchitecture) should be aligned
27237 so that its base address is a multiple of a cache line size. */
27239 int max_align
27240 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27242 if (max_align < BITS_PER_WORD)
27243 max_align = BITS_PER_WORD;
27245 switch (ix86_align_data_type)
27247 case ix86_align_data_type_abi: opt = false; break;
27248 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
27249 case ix86_align_data_type_cacheline: break;
27252 if (opt
27253 && AGGREGATE_TYPE_P (type)
27254 && TYPE_SIZE (type)
27255 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27257 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27258 && align < max_align_compat)
27259 align = max_align_compat;
27260 if (wi::geu_p (TYPE_SIZE (type), max_align)
27261 && align < max_align)
27262 align = max_align;
27265 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27266 to 16byte boundary. */
27267 if (TARGET_64BIT)
27269 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27270 && TYPE_SIZE (type)
27271 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27272 && wi::geu_p (TYPE_SIZE (type), 128)
27273 && align < 128)
27274 return 128;
27277 if (!opt)
27278 return align;
27280 if (TREE_CODE (type) == ARRAY_TYPE)
27282 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27283 return 64;
27284 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27285 return 128;
27287 else if (TREE_CODE (type) == COMPLEX_TYPE)
27290 if (TYPE_MODE (type) == DCmode && align < 64)
27291 return 64;
27292 if ((TYPE_MODE (type) == XCmode
27293 || TYPE_MODE (type) == TCmode) && align < 128)
27294 return 128;
27296 else if ((TREE_CODE (type) == RECORD_TYPE
27297 || TREE_CODE (type) == UNION_TYPE
27298 || TREE_CODE (type) == QUAL_UNION_TYPE)
27299 && TYPE_FIELDS (type))
27301 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27302 return 64;
27303 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27304 return 128;
27306 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27307 || TREE_CODE (type) == INTEGER_TYPE)
27309 if (TYPE_MODE (type) == DFmode && align < 64)
27310 return 64;
27311 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27312 return 128;
27315 return align;
27318 /* Compute the alignment for a local variable or a stack slot. EXP is
27319 the data type or decl itself, MODE is the widest mode available and
27320 ALIGN is the alignment that the object would ordinarily have. The
27321 value of this macro is used instead of that alignment to align the
27322 object. */
27324 unsigned int
27325 ix86_local_alignment (tree exp, machine_mode mode,
27326 unsigned int align)
27328 tree type, decl;
27330 if (exp && DECL_P (exp))
27332 type = TREE_TYPE (exp);
27333 decl = exp;
27335 else
27337 type = exp;
27338 decl = NULL;
27341 /* Don't do dynamic stack realignment for long long objects with
27342 -mpreferred-stack-boundary=2. */
27343 if (!TARGET_64BIT
27344 && align == 64
27345 && ix86_preferred_stack_boundary < 64
27346 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27347 && (!type || !TYPE_USER_ALIGN (type))
27348 && (!decl || !DECL_USER_ALIGN (decl)))
27349 align = 32;
27351 /* If TYPE is NULL, we are allocating a stack slot for caller-save
27352 register in MODE. We will return the largest alignment of XF
27353 and DF. */
27354 if (!type)
27356 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27357 align = GET_MODE_ALIGNMENT (DFmode);
27358 return align;
27361 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27362 to 16byte boundary. Exact wording is:
27364 An array uses the same alignment as its elements, except that a local or
27365 global array variable of length at least 16 bytes or
27366 a C99 variable-length array variable always has alignment of at least 16 bytes.
27368 This was added to allow use of aligned SSE instructions at arrays. This
27369 rule is meant for static storage (where compiler can not do the analysis
27370 by itself). We follow it for automatic variables only when convenient.
27371 We fully control everything in the function compiled and functions from
27372 other unit can not rely on the alignment.
27374 Exclude va_list type. It is the common case of local array where
27375 we can not benefit from the alignment.
27377 TODO: Probably one should optimize for size only when var is not escaping. */
27378 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27379 && TARGET_SSE)
27381 if (AGGREGATE_TYPE_P (type)
27382 && (va_list_type_node == NULL_TREE
27383 || (TYPE_MAIN_VARIANT (type)
27384 != TYPE_MAIN_VARIANT (va_list_type_node)))
27385 && TYPE_SIZE (type)
27386 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27387 && wi::geu_p (TYPE_SIZE (type), 16)
27388 && align < 128)
27389 return 128;
27391 if (TREE_CODE (type) == ARRAY_TYPE)
27393 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27394 return 64;
27395 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27396 return 128;
27398 else if (TREE_CODE (type) == COMPLEX_TYPE)
27400 if (TYPE_MODE (type) == DCmode && align < 64)
27401 return 64;
27402 if ((TYPE_MODE (type) == XCmode
27403 || TYPE_MODE (type) == TCmode) && align < 128)
27404 return 128;
27406 else if ((TREE_CODE (type) == RECORD_TYPE
27407 || TREE_CODE (type) == UNION_TYPE
27408 || TREE_CODE (type) == QUAL_UNION_TYPE)
27409 && TYPE_FIELDS (type))
27411 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27412 return 64;
27413 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27414 return 128;
27416 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27417 || TREE_CODE (type) == INTEGER_TYPE)
27420 if (TYPE_MODE (type) == DFmode && align < 64)
27421 return 64;
27422 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27423 return 128;
27425 return align;
27428 /* Compute the minimum required alignment for dynamic stack realignment
27429 purposes for a local variable, parameter or a stack slot. EXP is
27430 the data type or decl itself, MODE is its mode and ALIGN is the
27431 alignment that the object would ordinarily have. */
27433 unsigned int
27434 ix86_minimum_alignment (tree exp, machine_mode mode,
27435 unsigned int align)
27437 tree type, decl;
27439 if (exp && DECL_P (exp))
27441 type = TREE_TYPE (exp);
27442 decl = exp;
27444 else
27446 type = exp;
27447 decl = NULL;
27450 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27451 return align;
27453 /* Don't do dynamic stack realignment for long long objects with
27454 -mpreferred-stack-boundary=2. */
27455 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27456 && (!type || !TYPE_USER_ALIGN (type))
27457 && (!decl || !DECL_USER_ALIGN (decl)))
27458 return 32;
27460 return align;
27463 /* Find a location for the static chain incoming to a nested function.
27464 This is a register, unless all free registers are used by arguments. */
27466 static rtx
27467 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
27469 unsigned regno;
27471 /* While this function won't be called by the middle-end when a static
27472 chain isn't needed, it's also used throughout the backend so it's
27473 easiest to keep this check centralized. */
27474 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
27475 return NULL;
27477 if (TARGET_64BIT)
27479 /* We always use R10 in 64-bit mode. */
27480 regno = R10_REG;
27482 else
27484 const_tree fntype, fndecl;
27485 unsigned int ccvt;
27487 /* By default in 32-bit mode we use ECX to pass the static chain. */
27488 regno = CX_REG;
27490 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
27492 fntype = TREE_TYPE (fndecl_or_type);
27493 fndecl = fndecl_or_type;
27495 else
27497 fntype = fndecl_or_type;
27498 fndecl = NULL;
27501 ccvt = ix86_get_callcvt (fntype);
27502 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27504 /* Fastcall functions use ecx/edx for arguments, which leaves
27505 us with EAX for the static chain.
27506 Thiscall functions use ecx for arguments, which also
27507 leaves us with EAX for the static chain. */
27508 regno = AX_REG;
27510 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27512 /* Thiscall functions use ecx for arguments, which leaves
27513 us with EAX and EDX for the static chain.
27514 We are using for abi-compatibility EAX. */
27515 regno = AX_REG;
27517 else if (ix86_function_regparm (fntype, fndecl) == 3)
27519 /* For regparm 3, we have no free call-clobbered registers in
27520 which to store the static chain. In order to implement this,
27521 we have the trampoline push the static chain to the stack.
27522 However, we can't push a value below the return address when
27523 we call the nested function directly, so we have to use an
27524 alternate entry point. For this we use ESI, and have the
27525 alternate entry point push ESI, so that things appear the
27526 same once we're executing the nested function. */
27527 if (incoming_p)
27529 if (fndecl == current_function_decl)
27530 ix86_static_chain_on_stack = true;
27531 return gen_frame_mem (SImode,
27532 plus_constant (Pmode,
27533 arg_pointer_rtx, -8));
27535 regno = SI_REG;
27539 return gen_rtx_REG (Pmode, regno);
27542 /* Emit RTL insns to initialize the variable parts of a trampoline.
27543 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27544 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27545 to be passed to the target function. */
27547 static void
27548 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27550 rtx mem, fnaddr;
27551 int opcode;
27552 int offset = 0;
27554 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27556 if (TARGET_64BIT)
27558 int size;
27560 /* Load the function address to r11. Try to load address using
27561 the shorter movl instead of movabs. We may want to support
27562 movq for kernel mode, but kernel does not use trampolines at
27563 the moment. FNADDR is a 32bit address and may not be in
27564 DImode when ptr_mode == SImode. Always use movl in this
27565 case. */
27566 if (ptr_mode == SImode
27567 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27569 fnaddr = copy_addr_to_reg (fnaddr);
27571 mem = adjust_address (m_tramp, HImode, offset);
27572 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27574 mem = adjust_address (m_tramp, SImode, offset + 2);
27575 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27576 offset += 6;
27578 else
27580 mem = adjust_address (m_tramp, HImode, offset);
27581 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27583 mem = adjust_address (m_tramp, DImode, offset + 2);
27584 emit_move_insn (mem, fnaddr);
27585 offset += 10;
27588 /* Load static chain using movabs to r10. Use the shorter movl
27589 instead of movabs when ptr_mode == SImode. */
27590 if (ptr_mode == SImode)
27592 opcode = 0xba41;
27593 size = 6;
27595 else
27597 opcode = 0xba49;
27598 size = 10;
27601 mem = adjust_address (m_tramp, HImode, offset);
27602 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27604 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27605 emit_move_insn (mem, chain_value);
27606 offset += size;
27608 /* Jump to r11; the last (unused) byte is a nop, only there to
27609 pad the write out to a single 32-bit store. */
27610 mem = adjust_address (m_tramp, SImode, offset);
27611 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27612 offset += 4;
27614 else
27616 rtx disp, chain;
27618 /* Depending on the static chain location, either load a register
27619 with a constant, or push the constant to the stack. All of the
27620 instructions are the same size. */
27621 chain = ix86_static_chain (fndecl, true);
27622 if (REG_P (chain))
27624 switch (REGNO (chain))
27626 case AX_REG:
27627 opcode = 0xb8; break;
27628 case CX_REG:
27629 opcode = 0xb9; break;
27630 default:
27631 gcc_unreachable ();
27634 else
27635 opcode = 0x68;
27637 mem = adjust_address (m_tramp, QImode, offset);
27638 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27640 mem = adjust_address (m_tramp, SImode, offset + 1);
27641 emit_move_insn (mem, chain_value);
27642 offset += 5;
27644 mem = adjust_address (m_tramp, QImode, offset);
27645 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27647 mem = adjust_address (m_tramp, SImode, offset + 1);
27649 /* Compute offset from the end of the jmp to the target function.
27650 In the case in which the trampoline stores the static chain on
27651 the stack, we need to skip the first insn which pushes the
27652 (call-saved) register static chain; this push is 1 byte. */
27653 offset += 5;
27654 disp = expand_binop (SImode, sub_optab, fnaddr,
27655 plus_constant (Pmode, XEXP (m_tramp, 0),
27656 offset - (MEM_P (chain) ? 1 : 0)),
27657 NULL_RTX, 1, OPTAB_DIRECT);
27658 emit_move_insn (mem, disp);
27661 gcc_assert (offset <= TRAMPOLINE_SIZE);
27663 #ifdef HAVE_ENABLE_EXECUTE_STACK
27664 #ifdef CHECK_EXECUTE_STACK_ENABLED
27665 if (CHECK_EXECUTE_STACK_ENABLED)
27666 #endif
27667 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27668 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27669 #endif
27672 /* The following file contains several enumerations and data structures
27673 built from the definitions in i386-builtin-types.def. */
27675 #include "i386-builtin-types.inc"
27677 /* Table for the ix86 builtin non-function types. */
27678 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27680 /* Retrieve an element from the above table, building some of
27681 the types lazily. */
27683 static tree
27684 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27686 unsigned int index;
27687 tree type, itype;
27689 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27691 type = ix86_builtin_type_tab[(int) tcode];
27692 if (type != NULL)
27693 return type;
27695 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27696 if (tcode <= IX86_BT_LAST_VECT)
27698 machine_mode mode;
27700 index = tcode - IX86_BT_LAST_PRIM - 1;
27701 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27702 mode = ix86_builtin_type_vect_mode[index];
27704 type = build_vector_type_for_mode (itype, mode);
27706 else
27708 int quals;
27710 index = tcode - IX86_BT_LAST_VECT - 1;
27711 if (tcode <= IX86_BT_LAST_PTR)
27712 quals = TYPE_UNQUALIFIED;
27713 else
27714 quals = TYPE_QUAL_CONST;
27716 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27717 if (quals != TYPE_UNQUALIFIED)
27718 itype = build_qualified_type (itype, quals);
27720 type = build_pointer_type (itype);
27723 ix86_builtin_type_tab[(int) tcode] = type;
27724 return type;
27727 /* Table for the ix86 builtin function types. */
27728 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27730 /* Retrieve an element from the above table, building some of
27731 the types lazily. */
27733 static tree
27734 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27736 tree type;
27738 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27740 type = ix86_builtin_func_type_tab[(int) tcode];
27741 if (type != NULL)
27742 return type;
27744 if (tcode <= IX86_BT_LAST_FUNC)
27746 unsigned start = ix86_builtin_func_start[(int) tcode];
27747 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27748 tree rtype, atype, args = void_list_node;
27749 unsigned i;
27751 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27752 for (i = after - 1; i > start; --i)
27754 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27755 args = tree_cons (NULL, atype, args);
27758 type = build_function_type (rtype, args);
27760 else
27762 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27763 enum ix86_builtin_func_type icode;
27765 icode = ix86_builtin_func_alias_base[index];
27766 type = ix86_get_builtin_func_type (icode);
27769 ix86_builtin_func_type_tab[(int) tcode] = type;
27770 return type;
27774 /* Codes for all the SSE/MMX builtins. */
27775 enum ix86_builtins
27777 IX86_BUILTIN_ADDPS,
27778 IX86_BUILTIN_ADDSS,
27779 IX86_BUILTIN_DIVPS,
27780 IX86_BUILTIN_DIVSS,
27781 IX86_BUILTIN_MULPS,
27782 IX86_BUILTIN_MULSS,
27783 IX86_BUILTIN_SUBPS,
27784 IX86_BUILTIN_SUBSS,
27786 IX86_BUILTIN_CMPEQPS,
27787 IX86_BUILTIN_CMPLTPS,
27788 IX86_BUILTIN_CMPLEPS,
27789 IX86_BUILTIN_CMPGTPS,
27790 IX86_BUILTIN_CMPGEPS,
27791 IX86_BUILTIN_CMPNEQPS,
27792 IX86_BUILTIN_CMPNLTPS,
27793 IX86_BUILTIN_CMPNLEPS,
27794 IX86_BUILTIN_CMPNGTPS,
27795 IX86_BUILTIN_CMPNGEPS,
27796 IX86_BUILTIN_CMPORDPS,
27797 IX86_BUILTIN_CMPUNORDPS,
27798 IX86_BUILTIN_CMPEQSS,
27799 IX86_BUILTIN_CMPLTSS,
27800 IX86_BUILTIN_CMPLESS,
27801 IX86_BUILTIN_CMPNEQSS,
27802 IX86_BUILTIN_CMPNLTSS,
27803 IX86_BUILTIN_CMPNLESS,
27804 IX86_BUILTIN_CMPORDSS,
27805 IX86_BUILTIN_CMPUNORDSS,
27807 IX86_BUILTIN_COMIEQSS,
27808 IX86_BUILTIN_COMILTSS,
27809 IX86_BUILTIN_COMILESS,
27810 IX86_BUILTIN_COMIGTSS,
27811 IX86_BUILTIN_COMIGESS,
27812 IX86_BUILTIN_COMINEQSS,
27813 IX86_BUILTIN_UCOMIEQSS,
27814 IX86_BUILTIN_UCOMILTSS,
27815 IX86_BUILTIN_UCOMILESS,
27816 IX86_BUILTIN_UCOMIGTSS,
27817 IX86_BUILTIN_UCOMIGESS,
27818 IX86_BUILTIN_UCOMINEQSS,
27820 IX86_BUILTIN_CVTPI2PS,
27821 IX86_BUILTIN_CVTPS2PI,
27822 IX86_BUILTIN_CVTSI2SS,
27823 IX86_BUILTIN_CVTSI642SS,
27824 IX86_BUILTIN_CVTSS2SI,
27825 IX86_BUILTIN_CVTSS2SI64,
27826 IX86_BUILTIN_CVTTPS2PI,
27827 IX86_BUILTIN_CVTTSS2SI,
27828 IX86_BUILTIN_CVTTSS2SI64,
27830 IX86_BUILTIN_MAXPS,
27831 IX86_BUILTIN_MAXSS,
27832 IX86_BUILTIN_MINPS,
27833 IX86_BUILTIN_MINSS,
27835 IX86_BUILTIN_LOADUPS,
27836 IX86_BUILTIN_STOREUPS,
27837 IX86_BUILTIN_MOVSS,
27839 IX86_BUILTIN_MOVHLPS,
27840 IX86_BUILTIN_MOVLHPS,
27841 IX86_BUILTIN_LOADHPS,
27842 IX86_BUILTIN_LOADLPS,
27843 IX86_BUILTIN_STOREHPS,
27844 IX86_BUILTIN_STORELPS,
27846 IX86_BUILTIN_MASKMOVQ,
27847 IX86_BUILTIN_MOVMSKPS,
27848 IX86_BUILTIN_PMOVMSKB,
27850 IX86_BUILTIN_MOVNTPS,
27851 IX86_BUILTIN_MOVNTQ,
27853 IX86_BUILTIN_LOADDQU,
27854 IX86_BUILTIN_STOREDQU,
27856 IX86_BUILTIN_PACKSSWB,
27857 IX86_BUILTIN_PACKSSDW,
27858 IX86_BUILTIN_PACKUSWB,
27860 IX86_BUILTIN_PADDB,
27861 IX86_BUILTIN_PADDW,
27862 IX86_BUILTIN_PADDD,
27863 IX86_BUILTIN_PADDQ,
27864 IX86_BUILTIN_PADDSB,
27865 IX86_BUILTIN_PADDSW,
27866 IX86_BUILTIN_PADDUSB,
27867 IX86_BUILTIN_PADDUSW,
27868 IX86_BUILTIN_PSUBB,
27869 IX86_BUILTIN_PSUBW,
27870 IX86_BUILTIN_PSUBD,
27871 IX86_BUILTIN_PSUBQ,
27872 IX86_BUILTIN_PSUBSB,
27873 IX86_BUILTIN_PSUBSW,
27874 IX86_BUILTIN_PSUBUSB,
27875 IX86_BUILTIN_PSUBUSW,
27877 IX86_BUILTIN_PAND,
27878 IX86_BUILTIN_PANDN,
27879 IX86_BUILTIN_POR,
27880 IX86_BUILTIN_PXOR,
27882 IX86_BUILTIN_PAVGB,
27883 IX86_BUILTIN_PAVGW,
27885 IX86_BUILTIN_PCMPEQB,
27886 IX86_BUILTIN_PCMPEQW,
27887 IX86_BUILTIN_PCMPEQD,
27888 IX86_BUILTIN_PCMPGTB,
27889 IX86_BUILTIN_PCMPGTW,
27890 IX86_BUILTIN_PCMPGTD,
27892 IX86_BUILTIN_PMADDWD,
27894 IX86_BUILTIN_PMAXSW,
27895 IX86_BUILTIN_PMAXUB,
27896 IX86_BUILTIN_PMINSW,
27897 IX86_BUILTIN_PMINUB,
27899 IX86_BUILTIN_PMULHUW,
27900 IX86_BUILTIN_PMULHW,
27901 IX86_BUILTIN_PMULLW,
27903 IX86_BUILTIN_PSADBW,
27904 IX86_BUILTIN_PSHUFW,
27906 IX86_BUILTIN_PSLLW,
27907 IX86_BUILTIN_PSLLD,
27908 IX86_BUILTIN_PSLLQ,
27909 IX86_BUILTIN_PSRAW,
27910 IX86_BUILTIN_PSRAD,
27911 IX86_BUILTIN_PSRLW,
27912 IX86_BUILTIN_PSRLD,
27913 IX86_BUILTIN_PSRLQ,
27914 IX86_BUILTIN_PSLLWI,
27915 IX86_BUILTIN_PSLLDI,
27916 IX86_BUILTIN_PSLLQI,
27917 IX86_BUILTIN_PSRAWI,
27918 IX86_BUILTIN_PSRADI,
27919 IX86_BUILTIN_PSRLWI,
27920 IX86_BUILTIN_PSRLDI,
27921 IX86_BUILTIN_PSRLQI,
27923 IX86_BUILTIN_PUNPCKHBW,
27924 IX86_BUILTIN_PUNPCKHWD,
27925 IX86_BUILTIN_PUNPCKHDQ,
27926 IX86_BUILTIN_PUNPCKLBW,
27927 IX86_BUILTIN_PUNPCKLWD,
27928 IX86_BUILTIN_PUNPCKLDQ,
27930 IX86_BUILTIN_SHUFPS,
27932 IX86_BUILTIN_RCPPS,
27933 IX86_BUILTIN_RCPSS,
27934 IX86_BUILTIN_RSQRTPS,
27935 IX86_BUILTIN_RSQRTPS_NR,
27936 IX86_BUILTIN_RSQRTSS,
27937 IX86_BUILTIN_RSQRTF,
27938 IX86_BUILTIN_SQRTPS,
27939 IX86_BUILTIN_SQRTPS_NR,
27940 IX86_BUILTIN_SQRTSS,
27942 IX86_BUILTIN_UNPCKHPS,
27943 IX86_BUILTIN_UNPCKLPS,
27945 IX86_BUILTIN_ANDPS,
27946 IX86_BUILTIN_ANDNPS,
27947 IX86_BUILTIN_ORPS,
27948 IX86_BUILTIN_XORPS,
27950 IX86_BUILTIN_EMMS,
27951 IX86_BUILTIN_LDMXCSR,
27952 IX86_BUILTIN_STMXCSR,
27953 IX86_BUILTIN_SFENCE,
27955 IX86_BUILTIN_FXSAVE,
27956 IX86_BUILTIN_FXRSTOR,
27957 IX86_BUILTIN_FXSAVE64,
27958 IX86_BUILTIN_FXRSTOR64,
27960 IX86_BUILTIN_XSAVE,
27961 IX86_BUILTIN_XRSTOR,
27962 IX86_BUILTIN_XSAVE64,
27963 IX86_BUILTIN_XRSTOR64,
27965 IX86_BUILTIN_XSAVEOPT,
27966 IX86_BUILTIN_XSAVEOPT64,
27968 IX86_BUILTIN_XSAVEC,
27969 IX86_BUILTIN_XSAVEC64,
27971 IX86_BUILTIN_XSAVES,
27972 IX86_BUILTIN_XRSTORS,
27973 IX86_BUILTIN_XSAVES64,
27974 IX86_BUILTIN_XRSTORS64,
27976 /* 3DNow! Original */
27977 IX86_BUILTIN_FEMMS,
27978 IX86_BUILTIN_PAVGUSB,
27979 IX86_BUILTIN_PF2ID,
27980 IX86_BUILTIN_PFACC,
27981 IX86_BUILTIN_PFADD,
27982 IX86_BUILTIN_PFCMPEQ,
27983 IX86_BUILTIN_PFCMPGE,
27984 IX86_BUILTIN_PFCMPGT,
27985 IX86_BUILTIN_PFMAX,
27986 IX86_BUILTIN_PFMIN,
27987 IX86_BUILTIN_PFMUL,
27988 IX86_BUILTIN_PFRCP,
27989 IX86_BUILTIN_PFRCPIT1,
27990 IX86_BUILTIN_PFRCPIT2,
27991 IX86_BUILTIN_PFRSQIT1,
27992 IX86_BUILTIN_PFRSQRT,
27993 IX86_BUILTIN_PFSUB,
27994 IX86_BUILTIN_PFSUBR,
27995 IX86_BUILTIN_PI2FD,
27996 IX86_BUILTIN_PMULHRW,
27998 /* 3DNow! Athlon Extensions */
27999 IX86_BUILTIN_PF2IW,
28000 IX86_BUILTIN_PFNACC,
28001 IX86_BUILTIN_PFPNACC,
28002 IX86_BUILTIN_PI2FW,
28003 IX86_BUILTIN_PSWAPDSI,
28004 IX86_BUILTIN_PSWAPDSF,
28006 /* SSE2 */
28007 IX86_BUILTIN_ADDPD,
28008 IX86_BUILTIN_ADDSD,
28009 IX86_BUILTIN_DIVPD,
28010 IX86_BUILTIN_DIVSD,
28011 IX86_BUILTIN_MULPD,
28012 IX86_BUILTIN_MULSD,
28013 IX86_BUILTIN_SUBPD,
28014 IX86_BUILTIN_SUBSD,
28016 IX86_BUILTIN_CMPEQPD,
28017 IX86_BUILTIN_CMPLTPD,
28018 IX86_BUILTIN_CMPLEPD,
28019 IX86_BUILTIN_CMPGTPD,
28020 IX86_BUILTIN_CMPGEPD,
28021 IX86_BUILTIN_CMPNEQPD,
28022 IX86_BUILTIN_CMPNLTPD,
28023 IX86_BUILTIN_CMPNLEPD,
28024 IX86_BUILTIN_CMPNGTPD,
28025 IX86_BUILTIN_CMPNGEPD,
28026 IX86_BUILTIN_CMPORDPD,
28027 IX86_BUILTIN_CMPUNORDPD,
28028 IX86_BUILTIN_CMPEQSD,
28029 IX86_BUILTIN_CMPLTSD,
28030 IX86_BUILTIN_CMPLESD,
28031 IX86_BUILTIN_CMPNEQSD,
28032 IX86_BUILTIN_CMPNLTSD,
28033 IX86_BUILTIN_CMPNLESD,
28034 IX86_BUILTIN_CMPORDSD,
28035 IX86_BUILTIN_CMPUNORDSD,
28037 IX86_BUILTIN_COMIEQSD,
28038 IX86_BUILTIN_COMILTSD,
28039 IX86_BUILTIN_COMILESD,
28040 IX86_BUILTIN_COMIGTSD,
28041 IX86_BUILTIN_COMIGESD,
28042 IX86_BUILTIN_COMINEQSD,
28043 IX86_BUILTIN_UCOMIEQSD,
28044 IX86_BUILTIN_UCOMILTSD,
28045 IX86_BUILTIN_UCOMILESD,
28046 IX86_BUILTIN_UCOMIGTSD,
28047 IX86_BUILTIN_UCOMIGESD,
28048 IX86_BUILTIN_UCOMINEQSD,
28050 IX86_BUILTIN_MAXPD,
28051 IX86_BUILTIN_MAXSD,
28052 IX86_BUILTIN_MINPD,
28053 IX86_BUILTIN_MINSD,
28055 IX86_BUILTIN_ANDPD,
28056 IX86_BUILTIN_ANDNPD,
28057 IX86_BUILTIN_ORPD,
28058 IX86_BUILTIN_XORPD,
28060 IX86_BUILTIN_SQRTPD,
28061 IX86_BUILTIN_SQRTSD,
28063 IX86_BUILTIN_UNPCKHPD,
28064 IX86_BUILTIN_UNPCKLPD,
28066 IX86_BUILTIN_SHUFPD,
28068 IX86_BUILTIN_LOADUPD,
28069 IX86_BUILTIN_STOREUPD,
28070 IX86_BUILTIN_MOVSD,
28072 IX86_BUILTIN_LOADHPD,
28073 IX86_BUILTIN_LOADLPD,
28075 IX86_BUILTIN_CVTDQ2PD,
28076 IX86_BUILTIN_CVTDQ2PS,
28078 IX86_BUILTIN_CVTPD2DQ,
28079 IX86_BUILTIN_CVTPD2PI,
28080 IX86_BUILTIN_CVTPD2PS,
28081 IX86_BUILTIN_CVTTPD2DQ,
28082 IX86_BUILTIN_CVTTPD2PI,
28084 IX86_BUILTIN_CVTPI2PD,
28085 IX86_BUILTIN_CVTSI2SD,
28086 IX86_BUILTIN_CVTSI642SD,
28088 IX86_BUILTIN_CVTSD2SI,
28089 IX86_BUILTIN_CVTSD2SI64,
28090 IX86_BUILTIN_CVTSD2SS,
28091 IX86_BUILTIN_CVTSS2SD,
28092 IX86_BUILTIN_CVTTSD2SI,
28093 IX86_BUILTIN_CVTTSD2SI64,
28095 IX86_BUILTIN_CVTPS2DQ,
28096 IX86_BUILTIN_CVTPS2PD,
28097 IX86_BUILTIN_CVTTPS2DQ,
28099 IX86_BUILTIN_MOVNTI,
28100 IX86_BUILTIN_MOVNTI64,
28101 IX86_BUILTIN_MOVNTPD,
28102 IX86_BUILTIN_MOVNTDQ,
28104 IX86_BUILTIN_MOVQ128,
28106 /* SSE2 MMX */
28107 IX86_BUILTIN_MASKMOVDQU,
28108 IX86_BUILTIN_MOVMSKPD,
28109 IX86_BUILTIN_PMOVMSKB128,
28111 IX86_BUILTIN_PACKSSWB128,
28112 IX86_BUILTIN_PACKSSDW128,
28113 IX86_BUILTIN_PACKUSWB128,
28115 IX86_BUILTIN_PADDB128,
28116 IX86_BUILTIN_PADDW128,
28117 IX86_BUILTIN_PADDD128,
28118 IX86_BUILTIN_PADDQ128,
28119 IX86_BUILTIN_PADDSB128,
28120 IX86_BUILTIN_PADDSW128,
28121 IX86_BUILTIN_PADDUSB128,
28122 IX86_BUILTIN_PADDUSW128,
28123 IX86_BUILTIN_PSUBB128,
28124 IX86_BUILTIN_PSUBW128,
28125 IX86_BUILTIN_PSUBD128,
28126 IX86_BUILTIN_PSUBQ128,
28127 IX86_BUILTIN_PSUBSB128,
28128 IX86_BUILTIN_PSUBSW128,
28129 IX86_BUILTIN_PSUBUSB128,
28130 IX86_BUILTIN_PSUBUSW128,
28132 IX86_BUILTIN_PAND128,
28133 IX86_BUILTIN_PANDN128,
28134 IX86_BUILTIN_POR128,
28135 IX86_BUILTIN_PXOR128,
28137 IX86_BUILTIN_PAVGB128,
28138 IX86_BUILTIN_PAVGW128,
28140 IX86_BUILTIN_PCMPEQB128,
28141 IX86_BUILTIN_PCMPEQW128,
28142 IX86_BUILTIN_PCMPEQD128,
28143 IX86_BUILTIN_PCMPGTB128,
28144 IX86_BUILTIN_PCMPGTW128,
28145 IX86_BUILTIN_PCMPGTD128,
28147 IX86_BUILTIN_PMADDWD128,
28149 IX86_BUILTIN_PMAXSW128,
28150 IX86_BUILTIN_PMAXUB128,
28151 IX86_BUILTIN_PMINSW128,
28152 IX86_BUILTIN_PMINUB128,
28154 IX86_BUILTIN_PMULUDQ,
28155 IX86_BUILTIN_PMULUDQ128,
28156 IX86_BUILTIN_PMULHUW128,
28157 IX86_BUILTIN_PMULHW128,
28158 IX86_BUILTIN_PMULLW128,
28160 IX86_BUILTIN_PSADBW128,
28161 IX86_BUILTIN_PSHUFHW,
28162 IX86_BUILTIN_PSHUFLW,
28163 IX86_BUILTIN_PSHUFD,
28165 IX86_BUILTIN_PSLLDQI128,
28166 IX86_BUILTIN_PSLLWI128,
28167 IX86_BUILTIN_PSLLDI128,
28168 IX86_BUILTIN_PSLLQI128,
28169 IX86_BUILTIN_PSRAWI128,
28170 IX86_BUILTIN_PSRADI128,
28171 IX86_BUILTIN_PSRLDQI128,
28172 IX86_BUILTIN_PSRLWI128,
28173 IX86_BUILTIN_PSRLDI128,
28174 IX86_BUILTIN_PSRLQI128,
28176 IX86_BUILTIN_PSLLDQ128,
28177 IX86_BUILTIN_PSLLW128,
28178 IX86_BUILTIN_PSLLD128,
28179 IX86_BUILTIN_PSLLQ128,
28180 IX86_BUILTIN_PSRAW128,
28181 IX86_BUILTIN_PSRAD128,
28182 IX86_BUILTIN_PSRLW128,
28183 IX86_BUILTIN_PSRLD128,
28184 IX86_BUILTIN_PSRLQ128,
28186 IX86_BUILTIN_PUNPCKHBW128,
28187 IX86_BUILTIN_PUNPCKHWD128,
28188 IX86_BUILTIN_PUNPCKHDQ128,
28189 IX86_BUILTIN_PUNPCKHQDQ128,
28190 IX86_BUILTIN_PUNPCKLBW128,
28191 IX86_BUILTIN_PUNPCKLWD128,
28192 IX86_BUILTIN_PUNPCKLDQ128,
28193 IX86_BUILTIN_PUNPCKLQDQ128,
28195 IX86_BUILTIN_CLFLUSH,
28196 IX86_BUILTIN_MFENCE,
28197 IX86_BUILTIN_LFENCE,
28198 IX86_BUILTIN_PAUSE,
28200 IX86_BUILTIN_FNSTENV,
28201 IX86_BUILTIN_FLDENV,
28202 IX86_BUILTIN_FNSTSW,
28203 IX86_BUILTIN_FNCLEX,
28205 IX86_BUILTIN_BSRSI,
28206 IX86_BUILTIN_BSRDI,
28207 IX86_BUILTIN_RDPMC,
28208 IX86_BUILTIN_RDTSC,
28209 IX86_BUILTIN_RDTSCP,
28210 IX86_BUILTIN_ROLQI,
28211 IX86_BUILTIN_ROLHI,
28212 IX86_BUILTIN_RORQI,
28213 IX86_BUILTIN_RORHI,
28215 /* SSE3. */
28216 IX86_BUILTIN_ADDSUBPS,
28217 IX86_BUILTIN_HADDPS,
28218 IX86_BUILTIN_HSUBPS,
28219 IX86_BUILTIN_MOVSHDUP,
28220 IX86_BUILTIN_MOVSLDUP,
28221 IX86_BUILTIN_ADDSUBPD,
28222 IX86_BUILTIN_HADDPD,
28223 IX86_BUILTIN_HSUBPD,
28224 IX86_BUILTIN_LDDQU,
28226 IX86_BUILTIN_MONITOR,
28227 IX86_BUILTIN_MWAIT,
28229 /* SSSE3. */
28230 IX86_BUILTIN_PHADDW,
28231 IX86_BUILTIN_PHADDD,
28232 IX86_BUILTIN_PHADDSW,
28233 IX86_BUILTIN_PHSUBW,
28234 IX86_BUILTIN_PHSUBD,
28235 IX86_BUILTIN_PHSUBSW,
28236 IX86_BUILTIN_PMADDUBSW,
28237 IX86_BUILTIN_PMULHRSW,
28238 IX86_BUILTIN_PSHUFB,
28239 IX86_BUILTIN_PSIGNB,
28240 IX86_BUILTIN_PSIGNW,
28241 IX86_BUILTIN_PSIGND,
28242 IX86_BUILTIN_PALIGNR,
28243 IX86_BUILTIN_PABSB,
28244 IX86_BUILTIN_PABSW,
28245 IX86_BUILTIN_PABSD,
28247 IX86_BUILTIN_PHADDW128,
28248 IX86_BUILTIN_PHADDD128,
28249 IX86_BUILTIN_PHADDSW128,
28250 IX86_BUILTIN_PHSUBW128,
28251 IX86_BUILTIN_PHSUBD128,
28252 IX86_BUILTIN_PHSUBSW128,
28253 IX86_BUILTIN_PMADDUBSW128,
28254 IX86_BUILTIN_PMULHRSW128,
28255 IX86_BUILTIN_PSHUFB128,
28256 IX86_BUILTIN_PSIGNB128,
28257 IX86_BUILTIN_PSIGNW128,
28258 IX86_BUILTIN_PSIGND128,
28259 IX86_BUILTIN_PALIGNR128,
28260 IX86_BUILTIN_PABSB128,
28261 IX86_BUILTIN_PABSW128,
28262 IX86_BUILTIN_PABSD128,
28264 /* AMDFAM10 - SSE4A New Instructions. */
28265 IX86_BUILTIN_MOVNTSD,
28266 IX86_BUILTIN_MOVNTSS,
28267 IX86_BUILTIN_EXTRQI,
28268 IX86_BUILTIN_EXTRQ,
28269 IX86_BUILTIN_INSERTQI,
28270 IX86_BUILTIN_INSERTQ,
28272 /* SSE4.1. */
28273 IX86_BUILTIN_BLENDPD,
28274 IX86_BUILTIN_BLENDPS,
28275 IX86_BUILTIN_BLENDVPD,
28276 IX86_BUILTIN_BLENDVPS,
28277 IX86_BUILTIN_PBLENDVB128,
28278 IX86_BUILTIN_PBLENDW128,
28280 IX86_BUILTIN_DPPD,
28281 IX86_BUILTIN_DPPS,
28283 IX86_BUILTIN_INSERTPS128,
28285 IX86_BUILTIN_MOVNTDQA,
28286 IX86_BUILTIN_MPSADBW128,
28287 IX86_BUILTIN_PACKUSDW128,
28288 IX86_BUILTIN_PCMPEQQ,
28289 IX86_BUILTIN_PHMINPOSUW128,
28291 IX86_BUILTIN_PMAXSB128,
28292 IX86_BUILTIN_PMAXSD128,
28293 IX86_BUILTIN_PMAXUD128,
28294 IX86_BUILTIN_PMAXUW128,
28296 IX86_BUILTIN_PMINSB128,
28297 IX86_BUILTIN_PMINSD128,
28298 IX86_BUILTIN_PMINUD128,
28299 IX86_BUILTIN_PMINUW128,
28301 IX86_BUILTIN_PMOVSXBW128,
28302 IX86_BUILTIN_PMOVSXBD128,
28303 IX86_BUILTIN_PMOVSXBQ128,
28304 IX86_BUILTIN_PMOVSXWD128,
28305 IX86_BUILTIN_PMOVSXWQ128,
28306 IX86_BUILTIN_PMOVSXDQ128,
28308 IX86_BUILTIN_PMOVZXBW128,
28309 IX86_BUILTIN_PMOVZXBD128,
28310 IX86_BUILTIN_PMOVZXBQ128,
28311 IX86_BUILTIN_PMOVZXWD128,
28312 IX86_BUILTIN_PMOVZXWQ128,
28313 IX86_BUILTIN_PMOVZXDQ128,
28315 IX86_BUILTIN_PMULDQ128,
28316 IX86_BUILTIN_PMULLD128,
28318 IX86_BUILTIN_ROUNDSD,
28319 IX86_BUILTIN_ROUNDSS,
28321 IX86_BUILTIN_ROUNDPD,
28322 IX86_BUILTIN_ROUNDPS,
28324 IX86_BUILTIN_FLOORPD,
28325 IX86_BUILTIN_CEILPD,
28326 IX86_BUILTIN_TRUNCPD,
28327 IX86_BUILTIN_RINTPD,
28328 IX86_BUILTIN_ROUNDPD_AZ,
28330 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28331 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28332 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28334 IX86_BUILTIN_FLOORPS,
28335 IX86_BUILTIN_CEILPS,
28336 IX86_BUILTIN_TRUNCPS,
28337 IX86_BUILTIN_RINTPS,
28338 IX86_BUILTIN_ROUNDPS_AZ,
28340 IX86_BUILTIN_FLOORPS_SFIX,
28341 IX86_BUILTIN_CEILPS_SFIX,
28342 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28344 IX86_BUILTIN_PTESTZ,
28345 IX86_BUILTIN_PTESTC,
28346 IX86_BUILTIN_PTESTNZC,
28348 IX86_BUILTIN_VEC_INIT_V2SI,
28349 IX86_BUILTIN_VEC_INIT_V4HI,
28350 IX86_BUILTIN_VEC_INIT_V8QI,
28351 IX86_BUILTIN_VEC_EXT_V2DF,
28352 IX86_BUILTIN_VEC_EXT_V2DI,
28353 IX86_BUILTIN_VEC_EXT_V4SF,
28354 IX86_BUILTIN_VEC_EXT_V4SI,
28355 IX86_BUILTIN_VEC_EXT_V8HI,
28356 IX86_BUILTIN_VEC_EXT_V2SI,
28357 IX86_BUILTIN_VEC_EXT_V4HI,
28358 IX86_BUILTIN_VEC_EXT_V16QI,
28359 IX86_BUILTIN_VEC_SET_V2DI,
28360 IX86_BUILTIN_VEC_SET_V4SF,
28361 IX86_BUILTIN_VEC_SET_V4SI,
28362 IX86_BUILTIN_VEC_SET_V8HI,
28363 IX86_BUILTIN_VEC_SET_V4HI,
28364 IX86_BUILTIN_VEC_SET_V16QI,
28366 IX86_BUILTIN_VEC_PACK_SFIX,
28367 IX86_BUILTIN_VEC_PACK_SFIX256,
28369 /* SSE4.2. */
28370 IX86_BUILTIN_CRC32QI,
28371 IX86_BUILTIN_CRC32HI,
28372 IX86_BUILTIN_CRC32SI,
28373 IX86_BUILTIN_CRC32DI,
28375 IX86_BUILTIN_PCMPESTRI128,
28376 IX86_BUILTIN_PCMPESTRM128,
28377 IX86_BUILTIN_PCMPESTRA128,
28378 IX86_BUILTIN_PCMPESTRC128,
28379 IX86_BUILTIN_PCMPESTRO128,
28380 IX86_BUILTIN_PCMPESTRS128,
28381 IX86_BUILTIN_PCMPESTRZ128,
28382 IX86_BUILTIN_PCMPISTRI128,
28383 IX86_BUILTIN_PCMPISTRM128,
28384 IX86_BUILTIN_PCMPISTRA128,
28385 IX86_BUILTIN_PCMPISTRC128,
28386 IX86_BUILTIN_PCMPISTRO128,
28387 IX86_BUILTIN_PCMPISTRS128,
28388 IX86_BUILTIN_PCMPISTRZ128,
28390 IX86_BUILTIN_PCMPGTQ,
28392 /* AES instructions */
28393 IX86_BUILTIN_AESENC128,
28394 IX86_BUILTIN_AESENCLAST128,
28395 IX86_BUILTIN_AESDEC128,
28396 IX86_BUILTIN_AESDECLAST128,
28397 IX86_BUILTIN_AESIMC128,
28398 IX86_BUILTIN_AESKEYGENASSIST128,
28400 /* PCLMUL instruction */
28401 IX86_BUILTIN_PCLMULQDQ128,
28403 /* AVX */
28404 IX86_BUILTIN_ADDPD256,
28405 IX86_BUILTIN_ADDPS256,
28406 IX86_BUILTIN_ADDSUBPD256,
28407 IX86_BUILTIN_ADDSUBPS256,
28408 IX86_BUILTIN_ANDPD256,
28409 IX86_BUILTIN_ANDPS256,
28410 IX86_BUILTIN_ANDNPD256,
28411 IX86_BUILTIN_ANDNPS256,
28412 IX86_BUILTIN_BLENDPD256,
28413 IX86_BUILTIN_BLENDPS256,
28414 IX86_BUILTIN_BLENDVPD256,
28415 IX86_BUILTIN_BLENDVPS256,
28416 IX86_BUILTIN_DIVPD256,
28417 IX86_BUILTIN_DIVPS256,
28418 IX86_BUILTIN_DPPS256,
28419 IX86_BUILTIN_HADDPD256,
28420 IX86_BUILTIN_HADDPS256,
28421 IX86_BUILTIN_HSUBPD256,
28422 IX86_BUILTIN_HSUBPS256,
28423 IX86_BUILTIN_MAXPD256,
28424 IX86_BUILTIN_MAXPS256,
28425 IX86_BUILTIN_MINPD256,
28426 IX86_BUILTIN_MINPS256,
28427 IX86_BUILTIN_MULPD256,
28428 IX86_BUILTIN_MULPS256,
28429 IX86_BUILTIN_ORPD256,
28430 IX86_BUILTIN_ORPS256,
28431 IX86_BUILTIN_SHUFPD256,
28432 IX86_BUILTIN_SHUFPS256,
28433 IX86_BUILTIN_SUBPD256,
28434 IX86_BUILTIN_SUBPS256,
28435 IX86_BUILTIN_XORPD256,
28436 IX86_BUILTIN_XORPS256,
28437 IX86_BUILTIN_CMPSD,
28438 IX86_BUILTIN_CMPSS,
28439 IX86_BUILTIN_CMPPD,
28440 IX86_BUILTIN_CMPPS,
28441 IX86_BUILTIN_CMPPD256,
28442 IX86_BUILTIN_CMPPS256,
28443 IX86_BUILTIN_CVTDQ2PD256,
28444 IX86_BUILTIN_CVTDQ2PS256,
28445 IX86_BUILTIN_CVTPD2PS256,
28446 IX86_BUILTIN_CVTPS2DQ256,
28447 IX86_BUILTIN_CVTPS2PD256,
28448 IX86_BUILTIN_CVTTPD2DQ256,
28449 IX86_BUILTIN_CVTPD2DQ256,
28450 IX86_BUILTIN_CVTTPS2DQ256,
28451 IX86_BUILTIN_EXTRACTF128PD256,
28452 IX86_BUILTIN_EXTRACTF128PS256,
28453 IX86_BUILTIN_EXTRACTF128SI256,
28454 IX86_BUILTIN_VZEROALL,
28455 IX86_BUILTIN_VZEROUPPER,
28456 IX86_BUILTIN_VPERMILVARPD,
28457 IX86_BUILTIN_VPERMILVARPS,
28458 IX86_BUILTIN_VPERMILVARPD256,
28459 IX86_BUILTIN_VPERMILVARPS256,
28460 IX86_BUILTIN_VPERMILPD,
28461 IX86_BUILTIN_VPERMILPS,
28462 IX86_BUILTIN_VPERMILPD256,
28463 IX86_BUILTIN_VPERMILPS256,
28464 IX86_BUILTIN_VPERMIL2PD,
28465 IX86_BUILTIN_VPERMIL2PS,
28466 IX86_BUILTIN_VPERMIL2PD256,
28467 IX86_BUILTIN_VPERMIL2PS256,
28468 IX86_BUILTIN_VPERM2F128PD256,
28469 IX86_BUILTIN_VPERM2F128PS256,
28470 IX86_BUILTIN_VPERM2F128SI256,
28471 IX86_BUILTIN_VBROADCASTSS,
28472 IX86_BUILTIN_VBROADCASTSD256,
28473 IX86_BUILTIN_VBROADCASTSS256,
28474 IX86_BUILTIN_VBROADCASTPD256,
28475 IX86_BUILTIN_VBROADCASTPS256,
28476 IX86_BUILTIN_VINSERTF128PD256,
28477 IX86_BUILTIN_VINSERTF128PS256,
28478 IX86_BUILTIN_VINSERTF128SI256,
28479 IX86_BUILTIN_LOADUPD256,
28480 IX86_BUILTIN_LOADUPS256,
28481 IX86_BUILTIN_STOREUPD256,
28482 IX86_BUILTIN_STOREUPS256,
28483 IX86_BUILTIN_LDDQU256,
28484 IX86_BUILTIN_MOVNTDQ256,
28485 IX86_BUILTIN_MOVNTPD256,
28486 IX86_BUILTIN_MOVNTPS256,
28487 IX86_BUILTIN_LOADDQU256,
28488 IX86_BUILTIN_STOREDQU256,
28489 IX86_BUILTIN_MASKLOADPD,
28490 IX86_BUILTIN_MASKLOADPS,
28491 IX86_BUILTIN_MASKSTOREPD,
28492 IX86_BUILTIN_MASKSTOREPS,
28493 IX86_BUILTIN_MASKLOADPD256,
28494 IX86_BUILTIN_MASKLOADPS256,
28495 IX86_BUILTIN_MASKSTOREPD256,
28496 IX86_BUILTIN_MASKSTOREPS256,
28497 IX86_BUILTIN_MOVSHDUP256,
28498 IX86_BUILTIN_MOVSLDUP256,
28499 IX86_BUILTIN_MOVDDUP256,
28501 IX86_BUILTIN_SQRTPD256,
28502 IX86_BUILTIN_SQRTPS256,
28503 IX86_BUILTIN_SQRTPS_NR256,
28504 IX86_BUILTIN_RSQRTPS256,
28505 IX86_BUILTIN_RSQRTPS_NR256,
28507 IX86_BUILTIN_RCPPS256,
28509 IX86_BUILTIN_ROUNDPD256,
28510 IX86_BUILTIN_ROUNDPS256,
28512 IX86_BUILTIN_FLOORPD256,
28513 IX86_BUILTIN_CEILPD256,
28514 IX86_BUILTIN_TRUNCPD256,
28515 IX86_BUILTIN_RINTPD256,
28516 IX86_BUILTIN_ROUNDPD_AZ256,
28518 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28519 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28520 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28522 IX86_BUILTIN_FLOORPS256,
28523 IX86_BUILTIN_CEILPS256,
28524 IX86_BUILTIN_TRUNCPS256,
28525 IX86_BUILTIN_RINTPS256,
28526 IX86_BUILTIN_ROUNDPS_AZ256,
28528 IX86_BUILTIN_FLOORPS_SFIX256,
28529 IX86_BUILTIN_CEILPS_SFIX256,
28530 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28532 IX86_BUILTIN_UNPCKHPD256,
28533 IX86_BUILTIN_UNPCKLPD256,
28534 IX86_BUILTIN_UNPCKHPS256,
28535 IX86_BUILTIN_UNPCKLPS256,
28537 IX86_BUILTIN_SI256_SI,
28538 IX86_BUILTIN_PS256_PS,
28539 IX86_BUILTIN_PD256_PD,
28540 IX86_BUILTIN_SI_SI256,
28541 IX86_BUILTIN_PS_PS256,
28542 IX86_BUILTIN_PD_PD256,
28544 IX86_BUILTIN_VTESTZPD,
28545 IX86_BUILTIN_VTESTCPD,
28546 IX86_BUILTIN_VTESTNZCPD,
28547 IX86_BUILTIN_VTESTZPS,
28548 IX86_BUILTIN_VTESTCPS,
28549 IX86_BUILTIN_VTESTNZCPS,
28550 IX86_BUILTIN_VTESTZPD256,
28551 IX86_BUILTIN_VTESTCPD256,
28552 IX86_BUILTIN_VTESTNZCPD256,
28553 IX86_BUILTIN_VTESTZPS256,
28554 IX86_BUILTIN_VTESTCPS256,
28555 IX86_BUILTIN_VTESTNZCPS256,
28556 IX86_BUILTIN_PTESTZ256,
28557 IX86_BUILTIN_PTESTC256,
28558 IX86_BUILTIN_PTESTNZC256,
28560 IX86_BUILTIN_MOVMSKPD256,
28561 IX86_BUILTIN_MOVMSKPS256,
28563 /* AVX2 */
28564 IX86_BUILTIN_MPSADBW256,
28565 IX86_BUILTIN_PABSB256,
28566 IX86_BUILTIN_PABSW256,
28567 IX86_BUILTIN_PABSD256,
28568 IX86_BUILTIN_PACKSSDW256,
28569 IX86_BUILTIN_PACKSSWB256,
28570 IX86_BUILTIN_PACKUSDW256,
28571 IX86_BUILTIN_PACKUSWB256,
28572 IX86_BUILTIN_PADDB256,
28573 IX86_BUILTIN_PADDW256,
28574 IX86_BUILTIN_PADDD256,
28575 IX86_BUILTIN_PADDQ256,
28576 IX86_BUILTIN_PADDSB256,
28577 IX86_BUILTIN_PADDSW256,
28578 IX86_BUILTIN_PADDUSB256,
28579 IX86_BUILTIN_PADDUSW256,
28580 IX86_BUILTIN_PALIGNR256,
28581 IX86_BUILTIN_AND256I,
28582 IX86_BUILTIN_ANDNOT256I,
28583 IX86_BUILTIN_PAVGB256,
28584 IX86_BUILTIN_PAVGW256,
28585 IX86_BUILTIN_PBLENDVB256,
28586 IX86_BUILTIN_PBLENDVW256,
28587 IX86_BUILTIN_PCMPEQB256,
28588 IX86_BUILTIN_PCMPEQW256,
28589 IX86_BUILTIN_PCMPEQD256,
28590 IX86_BUILTIN_PCMPEQQ256,
28591 IX86_BUILTIN_PCMPGTB256,
28592 IX86_BUILTIN_PCMPGTW256,
28593 IX86_BUILTIN_PCMPGTD256,
28594 IX86_BUILTIN_PCMPGTQ256,
28595 IX86_BUILTIN_PHADDW256,
28596 IX86_BUILTIN_PHADDD256,
28597 IX86_BUILTIN_PHADDSW256,
28598 IX86_BUILTIN_PHSUBW256,
28599 IX86_BUILTIN_PHSUBD256,
28600 IX86_BUILTIN_PHSUBSW256,
28601 IX86_BUILTIN_PMADDUBSW256,
28602 IX86_BUILTIN_PMADDWD256,
28603 IX86_BUILTIN_PMAXSB256,
28604 IX86_BUILTIN_PMAXSW256,
28605 IX86_BUILTIN_PMAXSD256,
28606 IX86_BUILTIN_PMAXUB256,
28607 IX86_BUILTIN_PMAXUW256,
28608 IX86_BUILTIN_PMAXUD256,
28609 IX86_BUILTIN_PMINSB256,
28610 IX86_BUILTIN_PMINSW256,
28611 IX86_BUILTIN_PMINSD256,
28612 IX86_BUILTIN_PMINUB256,
28613 IX86_BUILTIN_PMINUW256,
28614 IX86_BUILTIN_PMINUD256,
28615 IX86_BUILTIN_PMOVMSKB256,
28616 IX86_BUILTIN_PMOVSXBW256,
28617 IX86_BUILTIN_PMOVSXBD256,
28618 IX86_BUILTIN_PMOVSXBQ256,
28619 IX86_BUILTIN_PMOVSXWD256,
28620 IX86_BUILTIN_PMOVSXWQ256,
28621 IX86_BUILTIN_PMOVSXDQ256,
28622 IX86_BUILTIN_PMOVZXBW256,
28623 IX86_BUILTIN_PMOVZXBD256,
28624 IX86_BUILTIN_PMOVZXBQ256,
28625 IX86_BUILTIN_PMOVZXWD256,
28626 IX86_BUILTIN_PMOVZXWQ256,
28627 IX86_BUILTIN_PMOVZXDQ256,
28628 IX86_BUILTIN_PMULDQ256,
28629 IX86_BUILTIN_PMULHRSW256,
28630 IX86_BUILTIN_PMULHUW256,
28631 IX86_BUILTIN_PMULHW256,
28632 IX86_BUILTIN_PMULLW256,
28633 IX86_BUILTIN_PMULLD256,
28634 IX86_BUILTIN_PMULUDQ256,
28635 IX86_BUILTIN_POR256,
28636 IX86_BUILTIN_PSADBW256,
28637 IX86_BUILTIN_PSHUFB256,
28638 IX86_BUILTIN_PSHUFD256,
28639 IX86_BUILTIN_PSHUFHW256,
28640 IX86_BUILTIN_PSHUFLW256,
28641 IX86_BUILTIN_PSIGNB256,
28642 IX86_BUILTIN_PSIGNW256,
28643 IX86_BUILTIN_PSIGND256,
28644 IX86_BUILTIN_PSLLDQI256,
28645 IX86_BUILTIN_PSLLWI256,
28646 IX86_BUILTIN_PSLLW256,
28647 IX86_BUILTIN_PSLLDI256,
28648 IX86_BUILTIN_PSLLD256,
28649 IX86_BUILTIN_PSLLQI256,
28650 IX86_BUILTIN_PSLLQ256,
28651 IX86_BUILTIN_PSRAWI256,
28652 IX86_BUILTIN_PSRAW256,
28653 IX86_BUILTIN_PSRADI256,
28654 IX86_BUILTIN_PSRAD256,
28655 IX86_BUILTIN_PSRLDQI256,
28656 IX86_BUILTIN_PSRLWI256,
28657 IX86_BUILTIN_PSRLW256,
28658 IX86_BUILTIN_PSRLDI256,
28659 IX86_BUILTIN_PSRLD256,
28660 IX86_BUILTIN_PSRLQI256,
28661 IX86_BUILTIN_PSRLQ256,
28662 IX86_BUILTIN_PSUBB256,
28663 IX86_BUILTIN_PSUBW256,
28664 IX86_BUILTIN_PSUBD256,
28665 IX86_BUILTIN_PSUBQ256,
28666 IX86_BUILTIN_PSUBSB256,
28667 IX86_BUILTIN_PSUBSW256,
28668 IX86_BUILTIN_PSUBUSB256,
28669 IX86_BUILTIN_PSUBUSW256,
28670 IX86_BUILTIN_PUNPCKHBW256,
28671 IX86_BUILTIN_PUNPCKHWD256,
28672 IX86_BUILTIN_PUNPCKHDQ256,
28673 IX86_BUILTIN_PUNPCKHQDQ256,
28674 IX86_BUILTIN_PUNPCKLBW256,
28675 IX86_BUILTIN_PUNPCKLWD256,
28676 IX86_BUILTIN_PUNPCKLDQ256,
28677 IX86_BUILTIN_PUNPCKLQDQ256,
28678 IX86_BUILTIN_PXOR256,
28679 IX86_BUILTIN_MOVNTDQA256,
28680 IX86_BUILTIN_VBROADCASTSS_PS,
28681 IX86_BUILTIN_VBROADCASTSS_PS256,
28682 IX86_BUILTIN_VBROADCASTSD_PD256,
28683 IX86_BUILTIN_VBROADCASTSI256,
28684 IX86_BUILTIN_PBLENDD256,
28685 IX86_BUILTIN_PBLENDD128,
28686 IX86_BUILTIN_PBROADCASTB256,
28687 IX86_BUILTIN_PBROADCASTW256,
28688 IX86_BUILTIN_PBROADCASTD256,
28689 IX86_BUILTIN_PBROADCASTQ256,
28690 IX86_BUILTIN_PBROADCASTB128,
28691 IX86_BUILTIN_PBROADCASTW128,
28692 IX86_BUILTIN_PBROADCASTD128,
28693 IX86_BUILTIN_PBROADCASTQ128,
28694 IX86_BUILTIN_VPERMVARSI256,
28695 IX86_BUILTIN_VPERMDF256,
28696 IX86_BUILTIN_VPERMVARSF256,
28697 IX86_BUILTIN_VPERMDI256,
28698 IX86_BUILTIN_VPERMTI256,
28699 IX86_BUILTIN_VEXTRACT128I256,
28700 IX86_BUILTIN_VINSERT128I256,
28701 IX86_BUILTIN_MASKLOADD,
28702 IX86_BUILTIN_MASKLOADQ,
28703 IX86_BUILTIN_MASKLOADD256,
28704 IX86_BUILTIN_MASKLOADQ256,
28705 IX86_BUILTIN_MASKSTORED,
28706 IX86_BUILTIN_MASKSTOREQ,
28707 IX86_BUILTIN_MASKSTORED256,
28708 IX86_BUILTIN_MASKSTOREQ256,
28709 IX86_BUILTIN_PSLLVV4DI,
28710 IX86_BUILTIN_PSLLVV2DI,
28711 IX86_BUILTIN_PSLLVV8SI,
28712 IX86_BUILTIN_PSLLVV4SI,
28713 IX86_BUILTIN_PSRAVV8SI,
28714 IX86_BUILTIN_PSRAVV4SI,
28715 IX86_BUILTIN_PSRLVV4DI,
28716 IX86_BUILTIN_PSRLVV2DI,
28717 IX86_BUILTIN_PSRLVV8SI,
28718 IX86_BUILTIN_PSRLVV4SI,
28720 IX86_BUILTIN_GATHERSIV2DF,
28721 IX86_BUILTIN_GATHERSIV4DF,
28722 IX86_BUILTIN_GATHERDIV2DF,
28723 IX86_BUILTIN_GATHERDIV4DF,
28724 IX86_BUILTIN_GATHERSIV4SF,
28725 IX86_BUILTIN_GATHERSIV8SF,
28726 IX86_BUILTIN_GATHERDIV4SF,
28727 IX86_BUILTIN_GATHERDIV8SF,
28728 IX86_BUILTIN_GATHERSIV2DI,
28729 IX86_BUILTIN_GATHERSIV4DI,
28730 IX86_BUILTIN_GATHERDIV2DI,
28731 IX86_BUILTIN_GATHERDIV4DI,
28732 IX86_BUILTIN_GATHERSIV4SI,
28733 IX86_BUILTIN_GATHERSIV8SI,
28734 IX86_BUILTIN_GATHERDIV4SI,
28735 IX86_BUILTIN_GATHERDIV8SI,
28737 /* AVX512F */
28738 IX86_BUILTIN_SI512_SI256,
28739 IX86_BUILTIN_PD512_PD256,
28740 IX86_BUILTIN_PS512_PS256,
28741 IX86_BUILTIN_SI512_SI,
28742 IX86_BUILTIN_PD512_PD,
28743 IX86_BUILTIN_PS512_PS,
28744 IX86_BUILTIN_ADDPD512,
28745 IX86_BUILTIN_ADDPS512,
28746 IX86_BUILTIN_ADDSD_ROUND,
28747 IX86_BUILTIN_ADDSS_ROUND,
28748 IX86_BUILTIN_ALIGND512,
28749 IX86_BUILTIN_ALIGNQ512,
28750 IX86_BUILTIN_BLENDMD512,
28751 IX86_BUILTIN_BLENDMPD512,
28752 IX86_BUILTIN_BLENDMPS512,
28753 IX86_BUILTIN_BLENDMQ512,
28754 IX86_BUILTIN_BROADCASTF32X4_512,
28755 IX86_BUILTIN_BROADCASTF64X4_512,
28756 IX86_BUILTIN_BROADCASTI32X4_512,
28757 IX86_BUILTIN_BROADCASTI64X4_512,
28758 IX86_BUILTIN_BROADCASTSD512,
28759 IX86_BUILTIN_BROADCASTSS512,
28760 IX86_BUILTIN_CMPD512,
28761 IX86_BUILTIN_CMPPD512,
28762 IX86_BUILTIN_CMPPS512,
28763 IX86_BUILTIN_CMPQ512,
28764 IX86_BUILTIN_CMPSD_MASK,
28765 IX86_BUILTIN_CMPSS_MASK,
28766 IX86_BUILTIN_COMIDF,
28767 IX86_BUILTIN_COMISF,
28768 IX86_BUILTIN_COMPRESSPD512,
28769 IX86_BUILTIN_COMPRESSPDSTORE512,
28770 IX86_BUILTIN_COMPRESSPS512,
28771 IX86_BUILTIN_COMPRESSPSSTORE512,
28772 IX86_BUILTIN_CVTDQ2PD512,
28773 IX86_BUILTIN_CVTDQ2PS512,
28774 IX86_BUILTIN_CVTPD2DQ512,
28775 IX86_BUILTIN_CVTPD2PS512,
28776 IX86_BUILTIN_CVTPD2UDQ512,
28777 IX86_BUILTIN_CVTPH2PS512,
28778 IX86_BUILTIN_CVTPS2DQ512,
28779 IX86_BUILTIN_CVTPS2PD512,
28780 IX86_BUILTIN_CVTPS2PH512,
28781 IX86_BUILTIN_CVTPS2UDQ512,
28782 IX86_BUILTIN_CVTSD2SS_ROUND,
28783 IX86_BUILTIN_CVTSI2SD64,
28784 IX86_BUILTIN_CVTSI2SS32,
28785 IX86_BUILTIN_CVTSI2SS64,
28786 IX86_BUILTIN_CVTSS2SD_ROUND,
28787 IX86_BUILTIN_CVTTPD2DQ512,
28788 IX86_BUILTIN_CVTTPD2UDQ512,
28789 IX86_BUILTIN_CVTTPS2DQ512,
28790 IX86_BUILTIN_CVTTPS2UDQ512,
28791 IX86_BUILTIN_CVTUDQ2PD512,
28792 IX86_BUILTIN_CVTUDQ2PS512,
28793 IX86_BUILTIN_CVTUSI2SD32,
28794 IX86_BUILTIN_CVTUSI2SD64,
28795 IX86_BUILTIN_CVTUSI2SS32,
28796 IX86_BUILTIN_CVTUSI2SS64,
28797 IX86_BUILTIN_DIVPD512,
28798 IX86_BUILTIN_DIVPS512,
28799 IX86_BUILTIN_DIVSD_ROUND,
28800 IX86_BUILTIN_DIVSS_ROUND,
28801 IX86_BUILTIN_EXPANDPD512,
28802 IX86_BUILTIN_EXPANDPD512Z,
28803 IX86_BUILTIN_EXPANDPDLOAD512,
28804 IX86_BUILTIN_EXPANDPDLOAD512Z,
28805 IX86_BUILTIN_EXPANDPS512,
28806 IX86_BUILTIN_EXPANDPS512Z,
28807 IX86_BUILTIN_EXPANDPSLOAD512,
28808 IX86_BUILTIN_EXPANDPSLOAD512Z,
28809 IX86_BUILTIN_EXTRACTF32X4,
28810 IX86_BUILTIN_EXTRACTF64X4,
28811 IX86_BUILTIN_EXTRACTI32X4,
28812 IX86_BUILTIN_EXTRACTI64X4,
28813 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28814 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28815 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28816 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28817 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28818 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28819 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28820 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28821 IX86_BUILTIN_GETEXPPD512,
28822 IX86_BUILTIN_GETEXPPS512,
28823 IX86_BUILTIN_GETEXPSD128,
28824 IX86_BUILTIN_GETEXPSS128,
28825 IX86_BUILTIN_GETMANTPD512,
28826 IX86_BUILTIN_GETMANTPS512,
28827 IX86_BUILTIN_GETMANTSD128,
28828 IX86_BUILTIN_GETMANTSS128,
28829 IX86_BUILTIN_INSERTF32X4,
28830 IX86_BUILTIN_INSERTF64X4,
28831 IX86_BUILTIN_INSERTI32X4,
28832 IX86_BUILTIN_INSERTI64X4,
28833 IX86_BUILTIN_LOADAPD512,
28834 IX86_BUILTIN_LOADAPS512,
28835 IX86_BUILTIN_LOADDQUDI512,
28836 IX86_BUILTIN_LOADDQUSI512,
28837 IX86_BUILTIN_LOADUPD512,
28838 IX86_BUILTIN_LOADUPS512,
28839 IX86_BUILTIN_MAXPD512,
28840 IX86_BUILTIN_MAXPS512,
28841 IX86_BUILTIN_MAXSD_ROUND,
28842 IX86_BUILTIN_MAXSS_ROUND,
28843 IX86_BUILTIN_MINPD512,
28844 IX86_BUILTIN_MINPS512,
28845 IX86_BUILTIN_MINSD_ROUND,
28846 IX86_BUILTIN_MINSS_ROUND,
28847 IX86_BUILTIN_MOVAPD512,
28848 IX86_BUILTIN_MOVAPS512,
28849 IX86_BUILTIN_MOVDDUP512,
28850 IX86_BUILTIN_MOVDQA32LOAD512,
28851 IX86_BUILTIN_MOVDQA32STORE512,
28852 IX86_BUILTIN_MOVDQA32_512,
28853 IX86_BUILTIN_MOVDQA64LOAD512,
28854 IX86_BUILTIN_MOVDQA64STORE512,
28855 IX86_BUILTIN_MOVDQA64_512,
28856 IX86_BUILTIN_MOVNTDQ512,
28857 IX86_BUILTIN_MOVNTDQA512,
28858 IX86_BUILTIN_MOVNTPD512,
28859 IX86_BUILTIN_MOVNTPS512,
28860 IX86_BUILTIN_MOVSHDUP512,
28861 IX86_BUILTIN_MOVSLDUP512,
28862 IX86_BUILTIN_MULPD512,
28863 IX86_BUILTIN_MULPS512,
28864 IX86_BUILTIN_MULSD_ROUND,
28865 IX86_BUILTIN_MULSS_ROUND,
28866 IX86_BUILTIN_PABSD512,
28867 IX86_BUILTIN_PABSQ512,
28868 IX86_BUILTIN_PADDD512,
28869 IX86_BUILTIN_PADDQ512,
28870 IX86_BUILTIN_PANDD512,
28871 IX86_BUILTIN_PANDND512,
28872 IX86_BUILTIN_PANDNQ512,
28873 IX86_BUILTIN_PANDQ512,
28874 IX86_BUILTIN_PBROADCASTD512,
28875 IX86_BUILTIN_PBROADCASTD512_GPR,
28876 IX86_BUILTIN_PBROADCASTMB512,
28877 IX86_BUILTIN_PBROADCASTMW512,
28878 IX86_BUILTIN_PBROADCASTQ512,
28879 IX86_BUILTIN_PBROADCASTQ512_GPR,
28880 IX86_BUILTIN_PCMPEQD512_MASK,
28881 IX86_BUILTIN_PCMPEQQ512_MASK,
28882 IX86_BUILTIN_PCMPGTD512_MASK,
28883 IX86_BUILTIN_PCMPGTQ512_MASK,
28884 IX86_BUILTIN_PCOMPRESSD512,
28885 IX86_BUILTIN_PCOMPRESSDSTORE512,
28886 IX86_BUILTIN_PCOMPRESSQ512,
28887 IX86_BUILTIN_PCOMPRESSQSTORE512,
28888 IX86_BUILTIN_PEXPANDD512,
28889 IX86_BUILTIN_PEXPANDD512Z,
28890 IX86_BUILTIN_PEXPANDDLOAD512,
28891 IX86_BUILTIN_PEXPANDDLOAD512Z,
28892 IX86_BUILTIN_PEXPANDQ512,
28893 IX86_BUILTIN_PEXPANDQ512Z,
28894 IX86_BUILTIN_PEXPANDQLOAD512,
28895 IX86_BUILTIN_PEXPANDQLOAD512Z,
28896 IX86_BUILTIN_PMAXSD512,
28897 IX86_BUILTIN_PMAXSQ512,
28898 IX86_BUILTIN_PMAXUD512,
28899 IX86_BUILTIN_PMAXUQ512,
28900 IX86_BUILTIN_PMINSD512,
28901 IX86_BUILTIN_PMINSQ512,
28902 IX86_BUILTIN_PMINUD512,
28903 IX86_BUILTIN_PMINUQ512,
28904 IX86_BUILTIN_PMOVDB512,
28905 IX86_BUILTIN_PMOVDB512_MEM,
28906 IX86_BUILTIN_PMOVDW512,
28907 IX86_BUILTIN_PMOVDW512_MEM,
28908 IX86_BUILTIN_PMOVQB512,
28909 IX86_BUILTIN_PMOVQB512_MEM,
28910 IX86_BUILTIN_PMOVQD512,
28911 IX86_BUILTIN_PMOVQD512_MEM,
28912 IX86_BUILTIN_PMOVQW512,
28913 IX86_BUILTIN_PMOVQW512_MEM,
28914 IX86_BUILTIN_PMOVSDB512,
28915 IX86_BUILTIN_PMOVSDB512_MEM,
28916 IX86_BUILTIN_PMOVSDW512,
28917 IX86_BUILTIN_PMOVSDW512_MEM,
28918 IX86_BUILTIN_PMOVSQB512,
28919 IX86_BUILTIN_PMOVSQB512_MEM,
28920 IX86_BUILTIN_PMOVSQD512,
28921 IX86_BUILTIN_PMOVSQD512_MEM,
28922 IX86_BUILTIN_PMOVSQW512,
28923 IX86_BUILTIN_PMOVSQW512_MEM,
28924 IX86_BUILTIN_PMOVSXBD512,
28925 IX86_BUILTIN_PMOVSXBQ512,
28926 IX86_BUILTIN_PMOVSXDQ512,
28927 IX86_BUILTIN_PMOVSXWD512,
28928 IX86_BUILTIN_PMOVSXWQ512,
28929 IX86_BUILTIN_PMOVUSDB512,
28930 IX86_BUILTIN_PMOVUSDB512_MEM,
28931 IX86_BUILTIN_PMOVUSDW512,
28932 IX86_BUILTIN_PMOVUSDW512_MEM,
28933 IX86_BUILTIN_PMOVUSQB512,
28934 IX86_BUILTIN_PMOVUSQB512_MEM,
28935 IX86_BUILTIN_PMOVUSQD512,
28936 IX86_BUILTIN_PMOVUSQD512_MEM,
28937 IX86_BUILTIN_PMOVUSQW512,
28938 IX86_BUILTIN_PMOVUSQW512_MEM,
28939 IX86_BUILTIN_PMOVZXBD512,
28940 IX86_BUILTIN_PMOVZXBQ512,
28941 IX86_BUILTIN_PMOVZXDQ512,
28942 IX86_BUILTIN_PMOVZXWD512,
28943 IX86_BUILTIN_PMOVZXWQ512,
28944 IX86_BUILTIN_PMULDQ512,
28945 IX86_BUILTIN_PMULLD512,
28946 IX86_BUILTIN_PMULUDQ512,
28947 IX86_BUILTIN_PORD512,
28948 IX86_BUILTIN_PORQ512,
28949 IX86_BUILTIN_PROLD512,
28950 IX86_BUILTIN_PROLQ512,
28951 IX86_BUILTIN_PROLVD512,
28952 IX86_BUILTIN_PROLVQ512,
28953 IX86_BUILTIN_PRORD512,
28954 IX86_BUILTIN_PRORQ512,
28955 IX86_BUILTIN_PRORVD512,
28956 IX86_BUILTIN_PRORVQ512,
28957 IX86_BUILTIN_PSHUFD512,
28958 IX86_BUILTIN_PSLLD512,
28959 IX86_BUILTIN_PSLLDI512,
28960 IX86_BUILTIN_PSLLQ512,
28961 IX86_BUILTIN_PSLLQI512,
28962 IX86_BUILTIN_PSLLVV16SI,
28963 IX86_BUILTIN_PSLLVV8DI,
28964 IX86_BUILTIN_PSRAD512,
28965 IX86_BUILTIN_PSRADI512,
28966 IX86_BUILTIN_PSRAQ512,
28967 IX86_BUILTIN_PSRAQI512,
28968 IX86_BUILTIN_PSRAVV16SI,
28969 IX86_BUILTIN_PSRAVV8DI,
28970 IX86_BUILTIN_PSRLD512,
28971 IX86_BUILTIN_PSRLDI512,
28972 IX86_BUILTIN_PSRLQ512,
28973 IX86_BUILTIN_PSRLQI512,
28974 IX86_BUILTIN_PSRLVV16SI,
28975 IX86_BUILTIN_PSRLVV8DI,
28976 IX86_BUILTIN_PSUBD512,
28977 IX86_BUILTIN_PSUBQ512,
28978 IX86_BUILTIN_PTESTMD512,
28979 IX86_BUILTIN_PTESTMQ512,
28980 IX86_BUILTIN_PTESTNMD512,
28981 IX86_BUILTIN_PTESTNMQ512,
28982 IX86_BUILTIN_PUNPCKHDQ512,
28983 IX86_BUILTIN_PUNPCKHQDQ512,
28984 IX86_BUILTIN_PUNPCKLDQ512,
28985 IX86_BUILTIN_PUNPCKLQDQ512,
28986 IX86_BUILTIN_PXORD512,
28987 IX86_BUILTIN_PXORQ512,
28988 IX86_BUILTIN_RCP14PD512,
28989 IX86_BUILTIN_RCP14PS512,
28990 IX86_BUILTIN_RCP14SD,
28991 IX86_BUILTIN_RCP14SS,
28992 IX86_BUILTIN_RNDSCALEPD,
28993 IX86_BUILTIN_RNDSCALEPS,
28994 IX86_BUILTIN_RNDSCALESD,
28995 IX86_BUILTIN_RNDSCALESS,
28996 IX86_BUILTIN_RSQRT14PD512,
28997 IX86_BUILTIN_RSQRT14PS512,
28998 IX86_BUILTIN_RSQRT14SD,
28999 IX86_BUILTIN_RSQRT14SS,
29000 IX86_BUILTIN_SCALEFPD512,
29001 IX86_BUILTIN_SCALEFPS512,
29002 IX86_BUILTIN_SCALEFSD,
29003 IX86_BUILTIN_SCALEFSS,
29004 IX86_BUILTIN_SHUFPD512,
29005 IX86_BUILTIN_SHUFPS512,
29006 IX86_BUILTIN_SHUF_F32x4,
29007 IX86_BUILTIN_SHUF_F64x2,
29008 IX86_BUILTIN_SHUF_I32x4,
29009 IX86_BUILTIN_SHUF_I64x2,
29010 IX86_BUILTIN_SQRTPD512,
29011 IX86_BUILTIN_SQRTPD512_MASK,
29012 IX86_BUILTIN_SQRTPS512_MASK,
29013 IX86_BUILTIN_SQRTPS_NR512,
29014 IX86_BUILTIN_SQRTSD_ROUND,
29015 IX86_BUILTIN_SQRTSS_ROUND,
29016 IX86_BUILTIN_STOREAPD512,
29017 IX86_BUILTIN_STOREAPS512,
29018 IX86_BUILTIN_STOREDQUDI512,
29019 IX86_BUILTIN_STOREDQUSI512,
29020 IX86_BUILTIN_STOREUPD512,
29021 IX86_BUILTIN_STOREUPS512,
29022 IX86_BUILTIN_SUBPD512,
29023 IX86_BUILTIN_SUBPS512,
29024 IX86_BUILTIN_SUBSD_ROUND,
29025 IX86_BUILTIN_SUBSS_ROUND,
29026 IX86_BUILTIN_UCMPD512,
29027 IX86_BUILTIN_UCMPQ512,
29028 IX86_BUILTIN_UNPCKHPD512,
29029 IX86_BUILTIN_UNPCKHPS512,
29030 IX86_BUILTIN_UNPCKLPD512,
29031 IX86_BUILTIN_UNPCKLPS512,
29032 IX86_BUILTIN_VCVTSD2SI32,
29033 IX86_BUILTIN_VCVTSD2SI64,
29034 IX86_BUILTIN_VCVTSD2USI32,
29035 IX86_BUILTIN_VCVTSD2USI64,
29036 IX86_BUILTIN_VCVTSS2SI32,
29037 IX86_BUILTIN_VCVTSS2SI64,
29038 IX86_BUILTIN_VCVTSS2USI32,
29039 IX86_BUILTIN_VCVTSS2USI64,
29040 IX86_BUILTIN_VCVTTSD2SI32,
29041 IX86_BUILTIN_VCVTTSD2SI64,
29042 IX86_BUILTIN_VCVTTSD2USI32,
29043 IX86_BUILTIN_VCVTTSD2USI64,
29044 IX86_BUILTIN_VCVTTSS2SI32,
29045 IX86_BUILTIN_VCVTTSS2SI64,
29046 IX86_BUILTIN_VCVTTSS2USI32,
29047 IX86_BUILTIN_VCVTTSS2USI64,
29048 IX86_BUILTIN_VFMADDPD512_MASK,
29049 IX86_BUILTIN_VFMADDPD512_MASK3,
29050 IX86_BUILTIN_VFMADDPD512_MASKZ,
29051 IX86_BUILTIN_VFMADDPS512_MASK,
29052 IX86_BUILTIN_VFMADDPS512_MASK3,
29053 IX86_BUILTIN_VFMADDPS512_MASKZ,
29054 IX86_BUILTIN_VFMADDSD3_ROUND,
29055 IX86_BUILTIN_VFMADDSS3_ROUND,
29056 IX86_BUILTIN_VFMADDSUBPD512_MASK,
29057 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
29058 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
29059 IX86_BUILTIN_VFMADDSUBPS512_MASK,
29060 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
29061 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
29062 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
29063 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
29064 IX86_BUILTIN_VFMSUBPD512_MASK3,
29065 IX86_BUILTIN_VFMSUBPS512_MASK3,
29066 IX86_BUILTIN_VFMSUBSD3_MASK3,
29067 IX86_BUILTIN_VFMSUBSS3_MASK3,
29068 IX86_BUILTIN_VFNMADDPD512_MASK,
29069 IX86_BUILTIN_VFNMADDPS512_MASK,
29070 IX86_BUILTIN_VFNMSUBPD512_MASK,
29071 IX86_BUILTIN_VFNMSUBPD512_MASK3,
29072 IX86_BUILTIN_VFNMSUBPS512_MASK,
29073 IX86_BUILTIN_VFNMSUBPS512_MASK3,
29074 IX86_BUILTIN_VPCLZCNTD512,
29075 IX86_BUILTIN_VPCLZCNTQ512,
29076 IX86_BUILTIN_VPCONFLICTD512,
29077 IX86_BUILTIN_VPCONFLICTQ512,
29078 IX86_BUILTIN_VPERMDF512,
29079 IX86_BUILTIN_VPERMDI512,
29080 IX86_BUILTIN_VPERMI2VARD512,
29081 IX86_BUILTIN_VPERMI2VARPD512,
29082 IX86_BUILTIN_VPERMI2VARPS512,
29083 IX86_BUILTIN_VPERMI2VARQ512,
29084 IX86_BUILTIN_VPERMILPD512,
29085 IX86_BUILTIN_VPERMILPS512,
29086 IX86_BUILTIN_VPERMILVARPD512,
29087 IX86_BUILTIN_VPERMILVARPS512,
29088 IX86_BUILTIN_VPERMT2VARD512,
29089 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
29090 IX86_BUILTIN_VPERMT2VARPD512,
29091 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
29092 IX86_BUILTIN_VPERMT2VARPS512,
29093 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
29094 IX86_BUILTIN_VPERMT2VARQ512,
29095 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
29096 IX86_BUILTIN_VPERMVARDF512,
29097 IX86_BUILTIN_VPERMVARDI512,
29098 IX86_BUILTIN_VPERMVARSF512,
29099 IX86_BUILTIN_VPERMVARSI512,
29100 IX86_BUILTIN_VTERNLOGD512_MASK,
29101 IX86_BUILTIN_VTERNLOGD512_MASKZ,
29102 IX86_BUILTIN_VTERNLOGQ512_MASK,
29103 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
29105 /* Mask arithmetic operations */
29106 IX86_BUILTIN_KAND16,
29107 IX86_BUILTIN_KANDN16,
29108 IX86_BUILTIN_KNOT16,
29109 IX86_BUILTIN_KOR16,
29110 IX86_BUILTIN_KORTESTC16,
29111 IX86_BUILTIN_KORTESTZ16,
29112 IX86_BUILTIN_KUNPCKBW,
29113 IX86_BUILTIN_KXNOR16,
29114 IX86_BUILTIN_KXOR16,
29115 IX86_BUILTIN_KMOV16,
29117 /* AVX512VL. */
29118 IX86_BUILTIN_PMOVUSQD256_MEM,
29119 IX86_BUILTIN_PMOVUSQD128_MEM,
29120 IX86_BUILTIN_PMOVSQD256_MEM,
29121 IX86_BUILTIN_PMOVSQD128_MEM,
29122 IX86_BUILTIN_PMOVQD256_MEM,
29123 IX86_BUILTIN_PMOVQD128_MEM,
29124 IX86_BUILTIN_PMOVUSQW256_MEM,
29125 IX86_BUILTIN_PMOVUSQW128_MEM,
29126 IX86_BUILTIN_PMOVSQW256_MEM,
29127 IX86_BUILTIN_PMOVSQW128_MEM,
29128 IX86_BUILTIN_PMOVQW256_MEM,
29129 IX86_BUILTIN_PMOVQW128_MEM,
29130 IX86_BUILTIN_PMOVUSQB256_MEM,
29131 IX86_BUILTIN_PMOVUSQB128_MEM,
29132 IX86_BUILTIN_PMOVSQB256_MEM,
29133 IX86_BUILTIN_PMOVSQB128_MEM,
29134 IX86_BUILTIN_PMOVQB256_MEM,
29135 IX86_BUILTIN_PMOVQB128_MEM,
29136 IX86_BUILTIN_PMOVUSDW256_MEM,
29137 IX86_BUILTIN_PMOVUSDW128_MEM,
29138 IX86_BUILTIN_PMOVSDW256_MEM,
29139 IX86_BUILTIN_PMOVSDW128_MEM,
29140 IX86_BUILTIN_PMOVDW256_MEM,
29141 IX86_BUILTIN_PMOVDW128_MEM,
29142 IX86_BUILTIN_PMOVUSDB256_MEM,
29143 IX86_BUILTIN_PMOVUSDB128_MEM,
29144 IX86_BUILTIN_PMOVSDB256_MEM,
29145 IX86_BUILTIN_PMOVSDB128_MEM,
29146 IX86_BUILTIN_PMOVDB256_MEM,
29147 IX86_BUILTIN_PMOVDB128_MEM,
29148 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29149 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29150 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29151 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29152 IX86_BUILTIN_MOVDQA64STORE256_MASK,
29153 IX86_BUILTIN_MOVDQA64STORE128_MASK,
29154 IX86_BUILTIN_MOVDQA32STORE256_MASK,
29155 IX86_BUILTIN_MOVDQA32STORE128_MASK,
29156 IX86_BUILTIN_LOADAPD256_MASK,
29157 IX86_BUILTIN_LOADAPD128_MASK,
29158 IX86_BUILTIN_LOADAPS256_MASK,
29159 IX86_BUILTIN_LOADAPS128_MASK,
29160 IX86_BUILTIN_STOREAPD256_MASK,
29161 IX86_BUILTIN_STOREAPD128_MASK,
29162 IX86_BUILTIN_STOREAPS256_MASK,
29163 IX86_BUILTIN_STOREAPS128_MASK,
29164 IX86_BUILTIN_LOADUPD256_MASK,
29165 IX86_BUILTIN_LOADUPD128_MASK,
29166 IX86_BUILTIN_LOADUPS256_MASK,
29167 IX86_BUILTIN_LOADUPS128_MASK,
29168 IX86_BUILTIN_STOREUPD256_MASK,
29169 IX86_BUILTIN_STOREUPD128_MASK,
29170 IX86_BUILTIN_STOREUPS256_MASK,
29171 IX86_BUILTIN_STOREUPS128_MASK,
29172 IX86_BUILTIN_LOADDQUDI256_MASK,
29173 IX86_BUILTIN_LOADDQUDI128_MASK,
29174 IX86_BUILTIN_LOADDQUSI256_MASK,
29175 IX86_BUILTIN_LOADDQUSI128_MASK,
29176 IX86_BUILTIN_LOADDQUHI256_MASK,
29177 IX86_BUILTIN_LOADDQUHI128_MASK,
29178 IX86_BUILTIN_LOADDQUQI256_MASK,
29179 IX86_BUILTIN_LOADDQUQI128_MASK,
29180 IX86_BUILTIN_STOREDQUDI256_MASK,
29181 IX86_BUILTIN_STOREDQUDI128_MASK,
29182 IX86_BUILTIN_STOREDQUSI256_MASK,
29183 IX86_BUILTIN_STOREDQUSI128_MASK,
29184 IX86_BUILTIN_STOREDQUHI256_MASK,
29185 IX86_BUILTIN_STOREDQUHI128_MASK,
29186 IX86_BUILTIN_STOREDQUQI256_MASK,
29187 IX86_BUILTIN_STOREDQUQI128_MASK,
29188 IX86_BUILTIN_COMPRESSPDSTORE256,
29189 IX86_BUILTIN_COMPRESSPDSTORE128,
29190 IX86_BUILTIN_COMPRESSPSSTORE256,
29191 IX86_BUILTIN_COMPRESSPSSTORE128,
29192 IX86_BUILTIN_PCOMPRESSQSTORE256,
29193 IX86_BUILTIN_PCOMPRESSQSTORE128,
29194 IX86_BUILTIN_PCOMPRESSDSTORE256,
29195 IX86_BUILTIN_PCOMPRESSDSTORE128,
29196 IX86_BUILTIN_EXPANDPDLOAD256,
29197 IX86_BUILTIN_EXPANDPDLOAD128,
29198 IX86_BUILTIN_EXPANDPSLOAD256,
29199 IX86_BUILTIN_EXPANDPSLOAD128,
29200 IX86_BUILTIN_PEXPANDQLOAD256,
29201 IX86_BUILTIN_PEXPANDQLOAD128,
29202 IX86_BUILTIN_PEXPANDDLOAD256,
29203 IX86_BUILTIN_PEXPANDDLOAD128,
29204 IX86_BUILTIN_EXPANDPDLOAD256Z,
29205 IX86_BUILTIN_EXPANDPDLOAD128Z,
29206 IX86_BUILTIN_EXPANDPSLOAD256Z,
29207 IX86_BUILTIN_EXPANDPSLOAD128Z,
29208 IX86_BUILTIN_PEXPANDQLOAD256Z,
29209 IX86_BUILTIN_PEXPANDQLOAD128Z,
29210 IX86_BUILTIN_PEXPANDDLOAD256Z,
29211 IX86_BUILTIN_PEXPANDDLOAD128Z,
29212 IX86_BUILTIN_PALIGNR256_MASK,
29213 IX86_BUILTIN_PALIGNR128_MASK,
29214 IX86_BUILTIN_MOVDQA64_256_MASK,
29215 IX86_BUILTIN_MOVDQA64_128_MASK,
29216 IX86_BUILTIN_MOVDQA32_256_MASK,
29217 IX86_BUILTIN_MOVDQA32_128_MASK,
29218 IX86_BUILTIN_MOVAPD256_MASK,
29219 IX86_BUILTIN_MOVAPD128_MASK,
29220 IX86_BUILTIN_MOVAPS256_MASK,
29221 IX86_BUILTIN_MOVAPS128_MASK,
29222 IX86_BUILTIN_MOVDQUHI256_MASK,
29223 IX86_BUILTIN_MOVDQUHI128_MASK,
29224 IX86_BUILTIN_MOVDQUQI256_MASK,
29225 IX86_BUILTIN_MOVDQUQI128_MASK,
29226 IX86_BUILTIN_MINPS128_MASK,
29227 IX86_BUILTIN_MAXPS128_MASK,
29228 IX86_BUILTIN_MINPD128_MASK,
29229 IX86_BUILTIN_MAXPD128_MASK,
29230 IX86_BUILTIN_MAXPD256_MASK,
29231 IX86_BUILTIN_MAXPS256_MASK,
29232 IX86_BUILTIN_MINPD256_MASK,
29233 IX86_BUILTIN_MINPS256_MASK,
29234 IX86_BUILTIN_MULPS128_MASK,
29235 IX86_BUILTIN_DIVPS128_MASK,
29236 IX86_BUILTIN_MULPD128_MASK,
29237 IX86_BUILTIN_DIVPD128_MASK,
29238 IX86_BUILTIN_DIVPD256_MASK,
29239 IX86_BUILTIN_DIVPS256_MASK,
29240 IX86_BUILTIN_MULPD256_MASK,
29241 IX86_BUILTIN_MULPS256_MASK,
29242 IX86_BUILTIN_ADDPD128_MASK,
29243 IX86_BUILTIN_ADDPD256_MASK,
29244 IX86_BUILTIN_ADDPS128_MASK,
29245 IX86_BUILTIN_ADDPS256_MASK,
29246 IX86_BUILTIN_SUBPD128_MASK,
29247 IX86_BUILTIN_SUBPD256_MASK,
29248 IX86_BUILTIN_SUBPS128_MASK,
29249 IX86_BUILTIN_SUBPS256_MASK,
29250 IX86_BUILTIN_XORPD256_MASK,
29251 IX86_BUILTIN_XORPD128_MASK,
29252 IX86_BUILTIN_XORPS256_MASK,
29253 IX86_BUILTIN_XORPS128_MASK,
29254 IX86_BUILTIN_ORPD256_MASK,
29255 IX86_BUILTIN_ORPD128_MASK,
29256 IX86_BUILTIN_ORPS256_MASK,
29257 IX86_BUILTIN_ORPS128_MASK,
29258 IX86_BUILTIN_BROADCASTF32x2_256,
29259 IX86_BUILTIN_BROADCASTI32x2_256,
29260 IX86_BUILTIN_BROADCASTI32x2_128,
29261 IX86_BUILTIN_BROADCASTF64X2_256,
29262 IX86_BUILTIN_BROADCASTI64X2_256,
29263 IX86_BUILTIN_BROADCASTF32X4_256,
29264 IX86_BUILTIN_BROADCASTI32X4_256,
29265 IX86_BUILTIN_EXTRACTF32X4_256,
29266 IX86_BUILTIN_EXTRACTI32X4_256,
29267 IX86_BUILTIN_DBPSADBW256,
29268 IX86_BUILTIN_DBPSADBW128,
29269 IX86_BUILTIN_CVTTPD2QQ256,
29270 IX86_BUILTIN_CVTTPD2QQ128,
29271 IX86_BUILTIN_CVTTPD2UQQ256,
29272 IX86_BUILTIN_CVTTPD2UQQ128,
29273 IX86_BUILTIN_CVTPD2QQ256,
29274 IX86_BUILTIN_CVTPD2QQ128,
29275 IX86_BUILTIN_CVTPD2UQQ256,
29276 IX86_BUILTIN_CVTPD2UQQ128,
29277 IX86_BUILTIN_CVTPD2UDQ256_MASK,
29278 IX86_BUILTIN_CVTPD2UDQ128_MASK,
29279 IX86_BUILTIN_CVTTPS2QQ256,
29280 IX86_BUILTIN_CVTTPS2QQ128,
29281 IX86_BUILTIN_CVTTPS2UQQ256,
29282 IX86_BUILTIN_CVTTPS2UQQ128,
29283 IX86_BUILTIN_CVTTPS2DQ256_MASK,
29284 IX86_BUILTIN_CVTTPS2DQ128_MASK,
29285 IX86_BUILTIN_CVTTPS2UDQ256,
29286 IX86_BUILTIN_CVTTPS2UDQ128,
29287 IX86_BUILTIN_CVTTPD2DQ256_MASK,
29288 IX86_BUILTIN_CVTTPD2DQ128_MASK,
29289 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29290 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29291 IX86_BUILTIN_CVTPD2DQ256_MASK,
29292 IX86_BUILTIN_CVTPD2DQ128_MASK,
29293 IX86_BUILTIN_CVTDQ2PD256_MASK,
29294 IX86_BUILTIN_CVTDQ2PD128_MASK,
29295 IX86_BUILTIN_CVTUDQ2PD256_MASK,
29296 IX86_BUILTIN_CVTUDQ2PD128_MASK,
29297 IX86_BUILTIN_CVTDQ2PS256_MASK,
29298 IX86_BUILTIN_CVTDQ2PS128_MASK,
29299 IX86_BUILTIN_CVTUDQ2PS256_MASK,
29300 IX86_BUILTIN_CVTUDQ2PS128_MASK,
29301 IX86_BUILTIN_CVTPS2PD256_MASK,
29302 IX86_BUILTIN_CVTPS2PD128_MASK,
29303 IX86_BUILTIN_PBROADCASTB256_MASK,
29304 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29305 IX86_BUILTIN_PBROADCASTB128_MASK,
29306 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29307 IX86_BUILTIN_PBROADCASTW256_MASK,
29308 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29309 IX86_BUILTIN_PBROADCASTW128_MASK,
29310 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29311 IX86_BUILTIN_PBROADCASTD256_MASK,
29312 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29313 IX86_BUILTIN_PBROADCASTD128_MASK,
29314 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29315 IX86_BUILTIN_PBROADCASTQ256_MASK,
29316 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29317 IX86_BUILTIN_PBROADCASTQ128_MASK,
29318 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29319 IX86_BUILTIN_BROADCASTSS256,
29320 IX86_BUILTIN_BROADCASTSS128,
29321 IX86_BUILTIN_BROADCASTSD256,
29322 IX86_BUILTIN_EXTRACTF64X2_256,
29323 IX86_BUILTIN_EXTRACTI64X2_256,
29324 IX86_BUILTIN_INSERTF32X4_256,
29325 IX86_BUILTIN_INSERTI32X4_256,
29326 IX86_BUILTIN_PMOVSXBW256_MASK,
29327 IX86_BUILTIN_PMOVSXBW128_MASK,
29328 IX86_BUILTIN_PMOVSXBD256_MASK,
29329 IX86_BUILTIN_PMOVSXBD128_MASK,
29330 IX86_BUILTIN_PMOVSXBQ256_MASK,
29331 IX86_BUILTIN_PMOVSXBQ128_MASK,
29332 IX86_BUILTIN_PMOVSXWD256_MASK,
29333 IX86_BUILTIN_PMOVSXWD128_MASK,
29334 IX86_BUILTIN_PMOVSXWQ256_MASK,
29335 IX86_BUILTIN_PMOVSXWQ128_MASK,
29336 IX86_BUILTIN_PMOVSXDQ256_MASK,
29337 IX86_BUILTIN_PMOVSXDQ128_MASK,
29338 IX86_BUILTIN_PMOVZXBW256_MASK,
29339 IX86_BUILTIN_PMOVZXBW128_MASK,
29340 IX86_BUILTIN_PMOVZXBD256_MASK,
29341 IX86_BUILTIN_PMOVZXBD128_MASK,
29342 IX86_BUILTIN_PMOVZXBQ256_MASK,
29343 IX86_BUILTIN_PMOVZXBQ128_MASK,
29344 IX86_BUILTIN_PMOVZXWD256_MASK,
29345 IX86_BUILTIN_PMOVZXWD128_MASK,
29346 IX86_BUILTIN_PMOVZXWQ256_MASK,
29347 IX86_BUILTIN_PMOVZXWQ128_MASK,
29348 IX86_BUILTIN_PMOVZXDQ256_MASK,
29349 IX86_BUILTIN_PMOVZXDQ128_MASK,
29350 IX86_BUILTIN_REDUCEPD256_MASK,
29351 IX86_BUILTIN_REDUCEPD128_MASK,
29352 IX86_BUILTIN_REDUCEPS256_MASK,
29353 IX86_BUILTIN_REDUCEPS128_MASK,
29354 IX86_BUILTIN_REDUCESD_MASK,
29355 IX86_BUILTIN_REDUCESS_MASK,
29356 IX86_BUILTIN_VPERMVARHI256_MASK,
29357 IX86_BUILTIN_VPERMVARHI128_MASK,
29358 IX86_BUILTIN_VPERMT2VARHI256,
29359 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29360 IX86_BUILTIN_VPERMT2VARHI128,
29361 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29362 IX86_BUILTIN_VPERMI2VARHI256,
29363 IX86_BUILTIN_VPERMI2VARHI128,
29364 IX86_BUILTIN_RCP14PD256,
29365 IX86_BUILTIN_RCP14PD128,
29366 IX86_BUILTIN_RCP14PS256,
29367 IX86_BUILTIN_RCP14PS128,
29368 IX86_BUILTIN_RSQRT14PD256_MASK,
29369 IX86_BUILTIN_RSQRT14PD128_MASK,
29370 IX86_BUILTIN_RSQRT14PS256_MASK,
29371 IX86_BUILTIN_RSQRT14PS128_MASK,
29372 IX86_BUILTIN_SQRTPD256_MASK,
29373 IX86_BUILTIN_SQRTPD128_MASK,
29374 IX86_BUILTIN_SQRTPS256_MASK,
29375 IX86_BUILTIN_SQRTPS128_MASK,
29376 IX86_BUILTIN_PADDB128_MASK,
29377 IX86_BUILTIN_PADDW128_MASK,
29378 IX86_BUILTIN_PADDD128_MASK,
29379 IX86_BUILTIN_PADDQ128_MASK,
29380 IX86_BUILTIN_PSUBB128_MASK,
29381 IX86_BUILTIN_PSUBW128_MASK,
29382 IX86_BUILTIN_PSUBD128_MASK,
29383 IX86_BUILTIN_PSUBQ128_MASK,
29384 IX86_BUILTIN_PADDSB128_MASK,
29385 IX86_BUILTIN_PADDSW128_MASK,
29386 IX86_BUILTIN_PSUBSB128_MASK,
29387 IX86_BUILTIN_PSUBSW128_MASK,
29388 IX86_BUILTIN_PADDUSB128_MASK,
29389 IX86_BUILTIN_PADDUSW128_MASK,
29390 IX86_BUILTIN_PSUBUSB128_MASK,
29391 IX86_BUILTIN_PSUBUSW128_MASK,
29392 IX86_BUILTIN_PADDB256_MASK,
29393 IX86_BUILTIN_PADDW256_MASK,
29394 IX86_BUILTIN_PADDD256_MASK,
29395 IX86_BUILTIN_PADDQ256_MASK,
29396 IX86_BUILTIN_PADDSB256_MASK,
29397 IX86_BUILTIN_PADDSW256_MASK,
29398 IX86_BUILTIN_PADDUSB256_MASK,
29399 IX86_BUILTIN_PADDUSW256_MASK,
29400 IX86_BUILTIN_PSUBB256_MASK,
29401 IX86_BUILTIN_PSUBW256_MASK,
29402 IX86_BUILTIN_PSUBD256_MASK,
29403 IX86_BUILTIN_PSUBQ256_MASK,
29404 IX86_BUILTIN_PSUBSB256_MASK,
29405 IX86_BUILTIN_PSUBSW256_MASK,
29406 IX86_BUILTIN_PSUBUSB256_MASK,
29407 IX86_BUILTIN_PSUBUSW256_MASK,
29408 IX86_BUILTIN_SHUF_F64x2_256,
29409 IX86_BUILTIN_SHUF_I64x2_256,
29410 IX86_BUILTIN_SHUF_I32x4_256,
29411 IX86_BUILTIN_SHUF_F32x4_256,
29412 IX86_BUILTIN_PMOVWB128,
29413 IX86_BUILTIN_PMOVWB256,
29414 IX86_BUILTIN_PMOVSWB128,
29415 IX86_BUILTIN_PMOVSWB256,
29416 IX86_BUILTIN_PMOVUSWB128,
29417 IX86_BUILTIN_PMOVUSWB256,
29418 IX86_BUILTIN_PMOVDB128,
29419 IX86_BUILTIN_PMOVDB256,
29420 IX86_BUILTIN_PMOVSDB128,
29421 IX86_BUILTIN_PMOVSDB256,
29422 IX86_BUILTIN_PMOVUSDB128,
29423 IX86_BUILTIN_PMOVUSDB256,
29424 IX86_BUILTIN_PMOVDW128,
29425 IX86_BUILTIN_PMOVDW256,
29426 IX86_BUILTIN_PMOVSDW128,
29427 IX86_BUILTIN_PMOVSDW256,
29428 IX86_BUILTIN_PMOVUSDW128,
29429 IX86_BUILTIN_PMOVUSDW256,
29430 IX86_BUILTIN_PMOVQB128,
29431 IX86_BUILTIN_PMOVQB256,
29432 IX86_BUILTIN_PMOVSQB128,
29433 IX86_BUILTIN_PMOVSQB256,
29434 IX86_BUILTIN_PMOVUSQB128,
29435 IX86_BUILTIN_PMOVUSQB256,
29436 IX86_BUILTIN_PMOVQW128,
29437 IX86_BUILTIN_PMOVQW256,
29438 IX86_BUILTIN_PMOVSQW128,
29439 IX86_BUILTIN_PMOVSQW256,
29440 IX86_BUILTIN_PMOVUSQW128,
29441 IX86_BUILTIN_PMOVUSQW256,
29442 IX86_BUILTIN_PMOVQD128,
29443 IX86_BUILTIN_PMOVQD256,
29444 IX86_BUILTIN_PMOVSQD128,
29445 IX86_BUILTIN_PMOVSQD256,
29446 IX86_BUILTIN_PMOVUSQD128,
29447 IX86_BUILTIN_PMOVUSQD256,
29448 IX86_BUILTIN_RANGEPD256,
29449 IX86_BUILTIN_RANGEPD128,
29450 IX86_BUILTIN_RANGEPS256,
29451 IX86_BUILTIN_RANGEPS128,
29452 IX86_BUILTIN_GETEXPPS256,
29453 IX86_BUILTIN_GETEXPPD256,
29454 IX86_BUILTIN_GETEXPPS128,
29455 IX86_BUILTIN_GETEXPPD128,
29456 IX86_BUILTIN_FIXUPIMMPD256_MASK,
29457 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29458 IX86_BUILTIN_FIXUPIMMPS256_MASK,
29459 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29460 IX86_BUILTIN_FIXUPIMMPD128_MASK,
29461 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29462 IX86_BUILTIN_FIXUPIMMPS128_MASK,
29463 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29464 IX86_BUILTIN_PABSQ256,
29465 IX86_BUILTIN_PABSQ128,
29466 IX86_BUILTIN_PABSD256_MASK,
29467 IX86_BUILTIN_PABSD128_MASK,
29468 IX86_BUILTIN_PMULHRSW256_MASK,
29469 IX86_BUILTIN_PMULHRSW128_MASK,
29470 IX86_BUILTIN_PMULHUW128_MASK,
29471 IX86_BUILTIN_PMULHUW256_MASK,
29472 IX86_BUILTIN_PMULHW256_MASK,
29473 IX86_BUILTIN_PMULHW128_MASK,
29474 IX86_BUILTIN_PMULLW256_MASK,
29475 IX86_BUILTIN_PMULLW128_MASK,
29476 IX86_BUILTIN_PMULLQ256,
29477 IX86_BUILTIN_PMULLQ128,
29478 IX86_BUILTIN_ANDPD256_MASK,
29479 IX86_BUILTIN_ANDPD128_MASK,
29480 IX86_BUILTIN_ANDPS256_MASK,
29481 IX86_BUILTIN_ANDPS128_MASK,
29482 IX86_BUILTIN_ANDNPD256_MASK,
29483 IX86_BUILTIN_ANDNPD128_MASK,
29484 IX86_BUILTIN_ANDNPS256_MASK,
29485 IX86_BUILTIN_ANDNPS128_MASK,
29486 IX86_BUILTIN_PSLLWI128_MASK,
29487 IX86_BUILTIN_PSLLDI128_MASK,
29488 IX86_BUILTIN_PSLLQI128_MASK,
29489 IX86_BUILTIN_PSLLW128_MASK,
29490 IX86_BUILTIN_PSLLD128_MASK,
29491 IX86_BUILTIN_PSLLQ128_MASK,
29492 IX86_BUILTIN_PSLLWI256_MASK ,
29493 IX86_BUILTIN_PSLLW256_MASK,
29494 IX86_BUILTIN_PSLLDI256_MASK,
29495 IX86_BUILTIN_PSLLD256_MASK,
29496 IX86_BUILTIN_PSLLQI256_MASK,
29497 IX86_BUILTIN_PSLLQ256_MASK,
29498 IX86_BUILTIN_PSRADI128_MASK,
29499 IX86_BUILTIN_PSRAD128_MASK,
29500 IX86_BUILTIN_PSRADI256_MASK,
29501 IX86_BUILTIN_PSRAD256_MASK,
29502 IX86_BUILTIN_PSRAQI128_MASK,
29503 IX86_BUILTIN_PSRAQ128_MASK,
29504 IX86_BUILTIN_PSRAQI256_MASK,
29505 IX86_BUILTIN_PSRAQ256_MASK,
29506 IX86_BUILTIN_PANDD256,
29507 IX86_BUILTIN_PANDD128,
29508 IX86_BUILTIN_PSRLDI128_MASK,
29509 IX86_BUILTIN_PSRLD128_MASK,
29510 IX86_BUILTIN_PSRLDI256_MASK,
29511 IX86_BUILTIN_PSRLD256_MASK,
29512 IX86_BUILTIN_PSRLQI128_MASK,
29513 IX86_BUILTIN_PSRLQ128_MASK,
29514 IX86_BUILTIN_PSRLQI256_MASK,
29515 IX86_BUILTIN_PSRLQ256_MASK,
29516 IX86_BUILTIN_PANDQ256,
29517 IX86_BUILTIN_PANDQ128,
29518 IX86_BUILTIN_PANDND256,
29519 IX86_BUILTIN_PANDND128,
29520 IX86_BUILTIN_PANDNQ256,
29521 IX86_BUILTIN_PANDNQ128,
29522 IX86_BUILTIN_PORD256,
29523 IX86_BUILTIN_PORD128,
29524 IX86_BUILTIN_PORQ256,
29525 IX86_BUILTIN_PORQ128,
29526 IX86_BUILTIN_PXORD256,
29527 IX86_BUILTIN_PXORD128,
29528 IX86_BUILTIN_PXORQ256,
29529 IX86_BUILTIN_PXORQ128,
29530 IX86_BUILTIN_PACKSSWB256_MASK,
29531 IX86_BUILTIN_PACKSSWB128_MASK,
29532 IX86_BUILTIN_PACKUSWB256_MASK,
29533 IX86_BUILTIN_PACKUSWB128_MASK,
29534 IX86_BUILTIN_RNDSCALEPS256,
29535 IX86_BUILTIN_RNDSCALEPD256,
29536 IX86_BUILTIN_RNDSCALEPS128,
29537 IX86_BUILTIN_RNDSCALEPD128,
29538 IX86_BUILTIN_VTERNLOGQ256_MASK,
29539 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29540 IX86_BUILTIN_VTERNLOGD256_MASK,
29541 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29542 IX86_BUILTIN_VTERNLOGQ128_MASK,
29543 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29544 IX86_BUILTIN_VTERNLOGD128_MASK,
29545 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29546 IX86_BUILTIN_SCALEFPD256,
29547 IX86_BUILTIN_SCALEFPS256,
29548 IX86_BUILTIN_SCALEFPD128,
29549 IX86_BUILTIN_SCALEFPS128,
29550 IX86_BUILTIN_VFMADDPD256_MASK,
29551 IX86_BUILTIN_VFMADDPD256_MASK3,
29552 IX86_BUILTIN_VFMADDPD256_MASKZ,
29553 IX86_BUILTIN_VFMADDPD128_MASK,
29554 IX86_BUILTIN_VFMADDPD128_MASK3,
29555 IX86_BUILTIN_VFMADDPD128_MASKZ,
29556 IX86_BUILTIN_VFMADDPS256_MASK,
29557 IX86_BUILTIN_VFMADDPS256_MASK3,
29558 IX86_BUILTIN_VFMADDPS256_MASKZ,
29559 IX86_BUILTIN_VFMADDPS128_MASK,
29560 IX86_BUILTIN_VFMADDPS128_MASK3,
29561 IX86_BUILTIN_VFMADDPS128_MASKZ,
29562 IX86_BUILTIN_VFMSUBPD256_MASK3,
29563 IX86_BUILTIN_VFMSUBPD128_MASK3,
29564 IX86_BUILTIN_VFMSUBPS256_MASK3,
29565 IX86_BUILTIN_VFMSUBPS128_MASK3,
29566 IX86_BUILTIN_VFNMADDPD256_MASK,
29567 IX86_BUILTIN_VFNMADDPD128_MASK,
29568 IX86_BUILTIN_VFNMADDPS256_MASK,
29569 IX86_BUILTIN_VFNMADDPS128_MASK,
29570 IX86_BUILTIN_VFNMSUBPD256_MASK,
29571 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29572 IX86_BUILTIN_VFNMSUBPD128_MASK,
29573 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29574 IX86_BUILTIN_VFNMSUBPS256_MASK,
29575 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29576 IX86_BUILTIN_VFNMSUBPS128_MASK,
29577 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29578 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29579 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29580 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29581 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29582 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29583 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29584 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29585 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29586 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29587 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29588 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29589 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29590 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29591 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29592 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29593 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29594 IX86_BUILTIN_INSERTF64X2_256,
29595 IX86_BUILTIN_INSERTI64X2_256,
29596 IX86_BUILTIN_PSRAVV16HI,
29597 IX86_BUILTIN_PSRAVV8HI,
29598 IX86_BUILTIN_PMADDUBSW256_MASK,
29599 IX86_BUILTIN_PMADDUBSW128_MASK,
29600 IX86_BUILTIN_PMADDWD256_MASK,
29601 IX86_BUILTIN_PMADDWD128_MASK,
29602 IX86_BUILTIN_PSRLVV16HI,
29603 IX86_BUILTIN_PSRLVV8HI,
29604 IX86_BUILTIN_CVTPS2DQ256_MASK,
29605 IX86_BUILTIN_CVTPS2DQ128_MASK,
29606 IX86_BUILTIN_CVTPS2UDQ256,
29607 IX86_BUILTIN_CVTPS2UDQ128,
29608 IX86_BUILTIN_CVTPS2QQ256,
29609 IX86_BUILTIN_CVTPS2QQ128,
29610 IX86_BUILTIN_CVTPS2UQQ256,
29611 IX86_BUILTIN_CVTPS2UQQ128,
29612 IX86_BUILTIN_GETMANTPS256,
29613 IX86_BUILTIN_GETMANTPS128,
29614 IX86_BUILTIN_GETMANTPD256,
29615 IX86_BUILTIN_GETMANTPD128,
29616 IX86_BUILTIN_MOVDDUP256_MASK,
29617 IX86_BUILTIN_MOVDDUP128_MASK,
29618 IX86_BUILTIN_MOVSHDUP256_MASK,
29619 IX86_BUILTIN_MOVSHDUP128_MASK,
29620 IX86_BUILTIN_MOVSLDUP256_MASK,
29621 IX86_BUILTIN_MOVSLDUP128_MASK,
29622 IX86_BUILTIN_CVTQQ2PS256,
29623 IX86_BUILTIN_CVTQQ2PS128,
29624 IX86_BUILTIN_CVTUQQ2PS256,
29625 IX86_BUILTIN_CVTUQQ2PS128,
29626 IX86_BUILTIN_CVTQQ2PD256,
29627 IX86_BUILTIN_CVTQQ2PD128,
29628 IX86_BUILTIN_CVTUQQ2PD256,
29629 IX86_BUILTIN_CVTUQQ2PD128,
29630 IX86_BUILTIN_VPERMT2VARQ256,
29631 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29632 IX86_BUILTIN_VPERMT2VARD256,
29633 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29634 IX86_BUILTIN_VPERMI2VARQ256,
29635 IX86_BUILTIN_VPERMI2VARD256,
29636 IX86_BUILTIN_VPERMT2VARPD256,
29637 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29638 IX86_BUILTIN_VPERMT2VARPS256,
29639 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29640 IX86_BUILTIN_VPERMI2VARPD256,
29641 IX86_BUILTIN_VPERMI2VARPS256,
29642 IX86_BUILTIN_VPERMT2VARQ128,
29643 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29644 IX86_BUILTIN_VPERMT2VARD128,
29645 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29646 IX86_BUILTIN_VPERMI2VARQ128,
29647 IX86_BUILTIN_VPERMI2VARD128,
29648 IX86_BUILTIN_VPERMT2VARPD128,
29649 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29650 IX86_BUILTIN_VPERMT2VARPS128,
29651 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29652 IX86_BUILTIN_VPERMI2VARPD128,
29653 IX86_BUILTIN_VPERMI2VARPS128,
29654 IX86_BUILTIN_PSHUFB256_MASK,
29655 IX86_BUILTIN_PSHUFB128_MASK,
29656 IX86_BUILTIN_PSHUFHW256_MASK,
29657 IX86_BUILTIN_PSHUFHW128_MASK,
29658 IX86_BUILTIN_PSHUFLW256_MASK,
29659 IX86_BUILTIN_PSHUFLW128_MASK,
29660 IX86_BUILTIN_PSHUFD256_MASK,
29661 IX86_BUILTIN_PSHUFD128_MASK,
29662 IX86_BUILTIN_SHUFPD256_MASK,
29663 IX86_BUILTIN_SHUFPD128_MASK,
29664 IX86_BUILTIN_SHUFPS256_MASK,
29665 IX86_BUILTIN_SHUFPS128_MASK,
29666 IX86_BUILTIN_PROLVQ256,
29667 IX86_BUILTIN_PROLVQ128,
29668 IX86_BUILTIN_PROLQ256,
29669 IX86_BUILTIN_PROLQ128,
29670 IX86_BUILTIN_PRORVQ256,
29671 IX86_BUILTIN_PRORVQ128,
29672 IX86_BUILTIN_PRORQ256,
29673 IX86_BUILTIN_PRORQ128,
29674 IX86_BUILTIN_PSRAVQ128,
29675 IX86_BUILTIN_PSRAVQ256,
29676 IX86_BUILTIN_PSLLVV4DI_MASK,
29677 IX86_BUILTIN_PSLLVV2DI_MASK,
29678 IX86_BUILTIN_PSLLVV8SI_MASK,
29679 IX86_BUILTIN_PSLLVV4SI_MASK,
29680 IX86_BUILTIN_PSRAVV8SI_MASK,
29681 IX86_BUILTIN_PSRAVV4SI_MASK,
29682 IX86_BUILTIN_PSRLVV4DI_MASK,
29683 IX86_BUILTIN_PSRLVV2DI_MASK,
29684 IX86_BUILTIN_PSRLVV8SI_MASK,
29685 IX86_BUILTIN_PSRLVV4SI_MASK,
29686 IX86_BUILTIN_PSRAWI256_MASK,
29687 IX86_BUILTIN_PSRAW256_MASK,
29688 IX86_BUILTIN_PSRAWI128_MASK,
29689 IX86_BUILTIN_PSRAW128_MASK,
29690 IX86_BUILTIN_PSRLWI256_MASK,
29691 IX86_BUILTIN_PSRLW256_MASK,
29692 IX86_BUILTIN_PSRLWI128_MASK,
29693 IX86_BUILTIN_PSRLW128_MASK,
29694 IX86_BUILTIN_PRORVD256,
29695 IX86_BUILTIN_PROLVD256,
29696 IX86_BUILTIN_PRORD256,
29697 IX86_BUILTIN_PROLD256,
29698 IX86_BUILTIN_PRORVD128,
29699 IX86_BUILTIN_PROLVD128,
29700 IX86_BUILTIN_PRORD128,
29701 IX86_BUILTIN_PROLD128,
29702 IX86_BUILTIN_FPCLASSPD256,
29703 IX86_BUILTIN_FPCLASSPD128,
29704 IX86_BUILTIN_FPCLASSSD,
29705 IX86_BUILTIN_FPCLASSPS256,
29706 IX86_BUILTIN_FPCLASSPS128,
29707 IX86_BUILTIN_FPCLASSSS,
29708 IX86_BUILTIN_CVTB2MASK128,
29709 IX86_BUILTIN_CVTB2MASK256,
29710 IX86_BUILTIN_CVTW2MASK128,
29711 IX86_BUILTIN_CVTW2MASK256,
29712 IX86_BUILTIN_CVTD2MASK128,
29713 IX86_BUILTIN_CVTD2MASK256,
29714 IX86_BUILTIN_CVTQ2MASK128,
29715 IX86_BUILTIN_CVTQ2MASK256,
29716 IX86_BUILTIN_CVTMASK2B128,
29717 IX86_BUILTIN_CVTMASK2B256,
29718 IX86_BUILTIN_CVTMASK2W128,
29719 IX86_BUILTIN_CVTMASK2W256,
29720 IX86_BUILTIN_CVTMASK2D128,
29721 IX86_BUILTIN_CVTMASK2D256,
29722 IX86_BUILTIN_CVTMASK2Q128,
29723 IX86_BUILTIN_CVTMASK2Q256,
29724 IX86_BUILTIN_PCMPEQB128_MASK,
29725 IX86_BUILTIN_PCMPEQB256_MASK,
29726 IX86_BUILTIN_PCMPEQW128_MASK,
29727 IX86_BUILTIN_PCMPEQW256_MASK,
29728 IX86_BUILTIN_PCMPEQD128_MASK,
29729 IX86_BUILTIN_PCMPEQD256_MASK,
29730 IX86_BUILTIN_PCMPEQQ128_MASK,
29731 IX86_BUILTIN_PCMPEQQ256_MASK,
29732 IX86_BUILTIN_PCMPGTB128_MASK,
29733 IX86_BUILTIN_PCMPGTB256_MASK,
29734 IX86_BUILTIN_PCMPGTW128_MASK,
29735 IX86_BUILTIN_PCMPGTW256_MASK,
29736 IX86_BUILTIN_PCMPGTD128_MASK,
29737 IX86_BUILTIN_PCMPGTD256_MASK,
29738 IX86_BUILTIN_PCMPGTQ128_MASK,
29739 IX86_BUILTIN_PCMPGTQ256_MASK,
29740 IX86_BUILTIN_PTESTMB128,
29741 IX86_BUILTIN_PTESTMB256,
29742 IX86_BUILTIN_PTESTMW128,
29743 IX86_BUILTIN_PTESTMW256,
29744 IX86_BUILTIN_PTESTMD128,
29745 IX86_BUILTIN_PTESTMD256,
29746 IX86_BUILTIN_PTESTMQ128,
29747 IX86_BUILTIN_PTESTMQ256,
29748 IX86_BUILTIN_PTESTNMB128,
29749 IX86_BUILTIN_PTESTNMB256,
29750 IX86_BUILTIN_PTESTNMW128,
29751 IX86_BUILTIN_PTESTNMW256,
29752 IX86_BUILTIN_PTESTNMD128,
29753 IX86_BUILTIN_PTESTNMD256,
29754 IX86_BUILTIN_PTESTNMQ128,
29755 IX86_BUILTIN_PTESTNMQ256,
29756 IX86_BUILTIN_PBROADCASTMB128,
29757 IX86_BUILTIN_PBROADCASTMB256,
29758 IX86_BUILTIN_PBROADCASTMW128,
29759 IX86_BUILTIN_PBROADCASTMW256,
29760 IX86_BUILTIN_COMPRESSPD256,
29761 IX86_BUILTIN_COMPRESSPD128,
29762 IX86_BUILTIN_COMPRESSPS256,
29763 IX86_BUILTIN_COMPRESSPS128,
29764 IX86_BUILTIN_PCOMPRESSQ256,
29765 IX86_BUILTIN_PCOMPRESSQ128,
29766 IX86_BUILTIN_PCOMPRESSD256,
29767 IX86_BUILTIN_PCOMPRESSD128,
29768 IX86_BUILTIN_EXPANDPD256,
29769 IX86_BUILTIN_EXPANDPD128,
29770 IX86_BUILTIN_EXPANDPS256,
29771 IX86_BUILTIN_EXPANDPS128,
29772 IX86_BUILTIN_PEXPANDQ256,
29773 IX86_BUILTIN_PEXPANDQ128,
29774 IX86_BUILTIN_PEXPANDD256,
29775 IX86_BUILTIN_PEXPANDD128,
29776 IX86_BUILTIN_EXPANDPD256Z,
29777 IX86_BUILTIN_EXPANDPD128Z,
29778 IX86_BUILTIN_EXPANDPS256Z,
29779 IX86_BUILTIN_EXPANDPS128Z,
29780 IX86_BUILTIN_PEXPANDQ256Z,
29781 IX86_BUILTIN_PEXPANDQ128Z,
29782 IX86_BUILTIN_PEXPANDD256Z,
29783 IX86_BUILTIN_PEXPANDD128Z,
29784 IX86_BUILTIN_PMAXSD256_MASK,
29785 IX86_BUILTIN_PMINSD256_MASK,
29786 IX86_BUILTIN_PMAXUD256_MASK,
29787 IX86_BUILTIN_PMINUD256_MASK,
29788 IX86_BUILTIN_PMAXSD128_MASK,
29789 IX86_BUILTIN_PMINSD128_MASK,
29790 IX86_BUILTIN_PMAXUD128_MASK,
29791 IX86_BUILTIN_PMINUD128_MASK,
29792 IX86_BUILTIN_PMAXSQ256_MASK,
29793 IX86_BUILTIN_PMINSQ256_MASK,
29794 IX86_BUILTIN_PMAXUQ256_MASK,
29795 IX86_BUILTIN_PMINUQ256_MASK,
29796 IX86_BUILTIN_PMAXSQ128_MASK,
29797 IX86_BUILTIN_PMINSQ128_MASK,
29798 IX86_BUILTIN_PMAXUQ128_MASK,
29799 IX86_BUILTIN_PMINUQ128_MASK,
29800 IX86_BUILTIN_PMINSB256_MASK,
29801 IX86_BUILTIN_PMINUB256_MASK,
29802 IX86_BUILTIN_PMAXSB256_MASK,
29803 IX86_BUILTIN_PMAXUB256_MASK,
29804 IX86_BUILTIN_PMINSB128_MASK,
29805 IX86_BUILTIN_PMINUB128_MASK,
29806 IX86_BUILTIN_PMAXSB128_MASK,
29807 IX86_BUILTIN_PMAXUB128_MASK,
29808 IX86_BUILTIN_PMINSW256_MASK,
29809 IX86_BUILTIN_PMINUW256_MASK,
29810 IX86_BUILTIN_PMAXSW256_MASK,
29811 IX86_BUILTIN_PMAXUW256_MASK,
29812 IX86_BUILTIN_PMINSW128_MASK,
29813 IX86_BUILTIN_PMINUW128_MASK,
29814 IX86_BUILTIN_PMAXSW128_MASK,
29815 IX86_BUILTIN_PMAXUW128_MASK,
29816 IX86_BUILTIN_VPCONFLICTQ256,
29817 IX86_BUILTIN_VPCONFLICTD256,
29818 IX86_BUILTIN_VPCLZCNTQ256,
29819 IX86_BUILTIN_VPCLZCNTD256,
29820 IX86_BUILTIN_UNPCKHPD256_MASK,
29821 IX86_BUILTIN_UNPCKHPD128_MASK,
29822 IX86_BUILTIN_UNPCKHPS256_MASK,
29823 IX86_BUILTIN_UNPCKHPS128_MASK,
29824 IX86_BUILTIN_UNPCKLPD256_MASK,
29825 IX86_BUILTIN_UNPCKLPD128_MASK,
29826 IX86_BUILTIN_UNPCKLPS256_MASK,
29827 IX86_BUILTIN_VPCONFLICTQ128,
29828 IX86_BUILTIN_VPCONFLICTD128,
29829 IX86_BUILTIN_VPCLZCNTQ128,
29830 IX86_BUILTIN_VPCLZCNTD128,
29831 IX86_BUILTIN_UNPCKLPS128_MASK,
29832 IX86_BUILTIN_ALIGND256,
29833 IX86_BUILTIN_ALIGNQ256,
29834 IX86_BUILTIN_ALIGND128,
29835 IX86_BUILTIN_ALIGNQ128,
29836 IX86_BUILTIN_CVTPS2PH256_MASK,
29837 IX86_BUILTIN_CVTPS2PH_MASK,
29838 IX86_BUILTIN_CVTPH2PS_MASK,
29839 IX86_BUILTIN_CVTPH2PS256_MASK,
29840 IX86_BUILTIN_PUNPCKHDQ128_MASK,
29841 IX86_BUILTIN_PUNPCKHDQ256_MASK,
29842 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
29843 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
29844 IX86_BUILTIN_PUNPCKLDQ128_MASK,
29845 IX86_BUILTIN_PUNPCKLDQ256_MASK,
29846 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
29847 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
29848 IX86_BUILTIN_PUNPCKHBW128_MASK,
29849 IX86_BUILTIN_PUNPCKHBW256_MASK,
29850 IX86_BUILTIN_PUNPCKHWD128_MASK,
29851 IX86_BUILTIN_PUNPCKHWD256_MASK,
29852 IX86_BUILTIN_PUNPCKLBW128_MASK,
29853 IX86_BUILTIN_PUNPCKLBW256_MASK,
29854 IX86_BUILTIN_PUNPCKLWD128_MASK,
29855 IX86_BUILTIN_PUNPCKLWD256_MASK,
29856 IX86_BUILTIN_PSLLVV16HI,
29857 IX86_BUILTIN_PSLLVV8HI,
29858 IX86_BUILTIN_PACKSSDW256_MASK,
29859 IX86_BUILTIN_PACKSSDW128_MASK,
29860 IX86_BUILTIN_PACKUSDW256_MASK,
29861 IX86_BUILTIN_PACKUSDW128_MASK,
29862 IX86_BUILTIN_PAVGB256_MASK,
29863 IX86_BUILTIN_PAVGW256_MASK,
29864 IX86_BUILTIN_PAVGB128_MASK,
29865 IX86_BUILTIN_PAVGW128_MASK,
29866 IX86_BUILTIN_VPERMVARSF256_MASK,
29867 IX86_BUILTIN_VPERMVARDF256_MASK,
29868 IX86_BUILTIN_VPERMDF256_MASK,
29869 IX86_BUILTIN_PABSB256_MASK,
29870 IX86_BUILTIN_PABSB128_MASK,
29871 IX86_BUILTIN_PABSW256_MASK,
29872 IX86_BUILTIN_PABSW128_MASK,
29873 IX86_BUILTIN_VPERMILVARPD_MASK,
29874 IX86_BUILTIN_VPERMILVARPS_MASK,
29875 IX86_BUILTIN_VPERMILVARPD256_MASK,
29876 IX86_BUILTIN_VPERMILVARPS256_MASK,
29877 IX86_BUILTIN_VPERMILPD_MASK,
29878 IX86_BUILTIN_VPERMILPS_MASK,
29879 IX86_BUILTIN_VPERMILPD256_MASK,
29880 IX86_BUILTIN_VPERMILPS256_MASK,
29881 IX86_BUILTIN_BLENDMQ256,
29882 IX86_BUILTIN_BLENDMD256,
29883 IX86_BUILTIN_BLENDMPD256,
29884 IX86_BUILTIN_BLENDMPS256,
29885 IX86_BUILTIN_BLENDMQ128,
29886 IX86_BUILTIN_BLENDMD128,
29887 IX86_BUILTIN_BLENDMPD128,
29888 IX86_BUILTIN_BLENDMPS128,
29889 IX86_BUILTIN_BLENDMW256,
29890 IX86_BUILTIN_BLENDMB256,
29891 IX86_BUILTIN_BLENDMW128,
29892 IX86_BUILTIN_BLENDMB128,
29893 IX86_BUILTIN_PMULLD256_MASK,
29894 IX86_BUILTIN_PMULLD128_MASK,
29895 IX86_BUILTIN_PMULUDQ256_MASK,
29896 IX86_BUILTIN_PMULDQ256_MASK,
29897 IX86_BUILTIN_PMULDQ128_MASK,
29898 IX86_BUILTIN_PMULUDQ128_MASK,
29899 IX86_BUILTIN_CVTPD2PS256_MASK,
29900 IX86_BUILTIN_CVTPD2PS_MASK,
29901 IX86_BUILTIN_VPERMVARSI256_MASK,
29902 IX86_BUILTIN_VPERMVARDI256_MASK,
29903 IX86_BUILTIN_VPERMDI256_MASK,
29904 IX86_BUILTIN_CMPQ256,
29905 IX86_BUILTIN_CMPD256,
29906 IX86_BUILTIN_UCMPQ256,
29907 IX86_BUILTIN_UCMPD256,
29908 IX86_BUILTIN_CMPB256,
29909 IX86_BUILTIN_CMPW256,
29910 IX86_BUILTIN_UCMPB256,
29911 IX86_BUILTIN_UCMPW256,
29912 IX86_BUILTIN_CMPPD256_MASK,
29913 IX86_BUILTIN_CMPPS256_MASK,
29914 IX86_BUILTIN_CMPQ128,
29915 IX86_BUILTIN_CMPD128,
29916 IX86_BUILTIN_UCMPQ128,
29917 IX86_BUILTIN_UCMPD128,
29918 IX86_BUILTIN_CMPB128,
29919 IX86_BUILTIN_CMPW128,
29920 IX86_BUILTIN_UCMPB128,
29921 IX86_BUILTIN_UCMPW128,
29922 IX86_BUILTIN_CMPPD128_MASK,
29923 IX86_BUILTIN_CMPPS128_MASK,
29925 IX86_BUILTIN_GATHER3SIV8SF,
29926 IX86_BUILTIN_GATHER3SIV4SF,
29927 IX86_BUILTIN_GATHER3SIV4DF,
29928 IX86_BUILTIN_GATHER3SIV2DF,
29929 IX86_BUILTIN_GATHER3DIV8SF,
29930 IX86_BUILTIN_GATHER3DIV4SF,
29931 IX86_BUILTIN_GATHER3DIV4DF,
29932 IX86_BUILTIN_GATHER3DIV2DF,
29933 IX86_BUILTIN_GATHER3SIV8SI,
29934 IX86_BUILTIN_GATHER3SIV4SI,
29935 IX86_BUILTIN_GATHER3SIV4DI,
29936 IX86_BUILTIN_GATHER3SIV2DI,
29937 IX86_BUILTIN_GATHER3DIV8SI,
29938 IX86_BUILTIN_GATHER3DIV4SI,
29939 IX86_BUILTIN_GATHER3DIV4DI,
29940 IX86_BUILTIN_GATHER3DIV2DI,
29941 IX86_BUILTIN_SCATTERSIV8SF,
29942 IX86_BUILTIN_SCATTERSIV4SF,
29943 IX86_BUILTIN_SCATTERSIV4DF,
29944 IX86_BUILTIN_SCATTERSIV2DF,
29945 IX86_BUILTIN_SCATTERDIV8SF,
29946 IX86_BUILTIN_SCATTERDIV4SF,
29947 IX86_BUILTIN_SCATTERDIV4DF,
29948 IX86_BUILTIN_SCATTERDIV2DF,
29949 IX86_BUILTIN_SCATTERSIV8SI,
29950 IX86_BUILTIN_SCATTERSIV4SI,
29951 IX86_BUILTIN_SCATTERSIV4DI,
29952 IX86_BUILTIN_SCATTERSIV2DI,
29953 IX86_BUILTIN_SCATTERDIV8SI,
29954 IX86_BUILTIN_SCATTERDIV4SI,
29955 IX86_BUILTIN_SCATTERDIV4DI,
29956 IX86_BUILTIN_SCATTERDIV2DI,
29958 /* AVX512DQ. */
29959 IX86_BUILTIN_RANGESD128,
29960 IX86_BUILTIN_RANGESS128,
29961 IX86_BUILTIN_KUNPCKWD,
29962 IX86_BUILTIN_KUNPCKDQ,
29963 IX86_BUILTIN_BROADCASTF32x2_512,
29964 IX86_BUILTIN_BROADCASTI32x2_512,
29965 IX86_BUILTIN_BROADCASTF64X2_512,
29966 IX86_BUILTIN_BROADCASTI64X2_512,
29967 IX86_BUILTIN_BROADCASTF32X8_512,
29968 IX86_BUILTIN_BROADCASTI32X8_512,
29969 IX86_BUILTIN_EXTRACTF64X2_512,
29970 IX86_BUILTIN_EXTRACTF32X8,
29971 IX86_BUILTIN_EXTRACTI64X2_512,
29972 IX86_BUILTIN_EXTRACTI32X8,
29973 IX86_BUILTIN_REDUCEPD512_MASK,
29974 IX86_BUILTIN_REDUCEPS512_MASK,
29975 IX86_BUILTIN_PMULLQ512,
29976 IX86_BUILTIN_XORPD512,
29977 IX86_BUILTIN_XORPS512,
29978 IX86_BUILTIN_ORPD512,
29979 IX86_BUILTIN_ORPS512,
29980 IX86_BUILTIN_ANDPD512,
29981 IX86_BUILTIN_ANDPS512,
29982 IX86_BUILTIN_ANDNPD512,
29983 IX86_BUILTIN_ANDNPS512,
29984 IX86_BUILTIN_INSERTF32X8,
29985 IX86_BUILTIN_INSERTI32X8,
29986 IX86_BUILTIN_INSERTF64X2_512,
29987 IX86_BUILTIN_INSERTI64X2_512,
29988 IX86_BUILTIN_FPCLASSPD512,
29989 IX86_BUILTIN_FPCLASSPS512,
29990 IX86_BUILTIN_CVTD2MASK512,
29991 IX86_BUILTIN_CVTQ2MASK512,
29992 IX86_BUILTIN_CVTMASK2D512,
29993 IX86_BUILTIN_CVTMASK2Q512,
29994 IX86_BUILTIN_CVTPD2QQ512,
29995 IX86_BUILTIN_CVTPS2QQ512,
29996 IX86_BUILTIN_CVTPD2UQQ512,
29997 IX86_BUILTIN_CVTPS2UQQ512,
29998 IX86_BUILTIN_CVTQQ2PS512,
29999 IX86_BUILTIN_CVTUQQ2PS512,
30000 IX86_BUILTIN_CVTQQ2PD512,
30001 IX86_BUILTIN_CVTUQQ2PD512,
30002 IX86_BUILTIN_CVTTPS2QQ512,
30003 IX86_BUILTIN_CVTTPS2UQQ512,
30004 IX86_BUILTIN_CVTTPD2QQ512,
30005 IX86_BUILTIN_CVTTPD2UQQ512,
30006 IX86_BUILTIN_RANGEPS512,
30007 IX86_BUILTIN_RANGEPD512,
30009 /* AVX512BW. */
30010 IX86_BUILTIN_PACKUSDW512,
30011 IX86_BUILTIN_PACKSSDW512,
30012 IX86_BUILTIN_LOADDQUHI512_MASK,
30013 IX86_BUILTIN_LOADDQUQI512_MASK,
30014 IX86_BUILTIN_PSLLDQ512,
30015 IX86_BUILTIN_PSRLDQ512,
30016 IX86_BUILTIN_STOREDQUHI512_MASK,
30017 IX86_BUILTIN_STOREDQUQI512_MASK,
30018 IX86_BUILTIN_PALIGNR512,
30019 IX86_BUILTIN_PALIGNR512_MASK,
30020 IX86_BUILTIN_MOVDQUHI512_MASK,
30021 IX86_BUILTIN_MOVDQUQI512_MASK,
30022 IX86_BUILTIN_PSADBW512,
30023 IX86_BUILTIN_DBPSADBW512,
30024 IX86_BUILTIN_PBROADCASTB512,
30025 IX86_BUILTIN_PBROADCASTB512_GPR,
30026 IX86_BUILTIN_PBROADCASTW512,
30027 IX86_BUILTIN_PBROADCASTW512_GPR,
30028 IX86_BUILTIN_PMOVSXBW512_MASK,
30029 IX86_BUILTIN_PMOVZXBW512_MASK,
30030 IX86_BUILTIN_VPERMVARHI512_MASK,
30031 IX86_BUILTIN_VPERMT2VARHI512,
30032 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
30033 IX86_BUILTIN_VPERMI2VARHI512,
30034 IX86_BUILTIN_PAVGB512,
30035 IX86_BUILTIN_PAVGW512,
30036 IX86_BUILTIN_PADDB512,
30037 IX86_BUILTIN_PSUBB512,
30038 IX86_BUILTIN_PSUBSB512,
30039 IX86_BUILTIN_PADDSB512,
30040 IX86_BUILTIN_PSUBUSB512,
30041 IX86_BUILTIN_PADDUSB512,
30042 IX86_BUILTIN_PSUBW512,
30043 IX86_BUILTIN_PADDW512,
30044 IX86_BUILTIN_PSUBSW512,
30045 IX86_BUILTIN_PADDSW512,
30046 IX86_BUILTIN_PSUBUSW512,
30047 IX86_BUILTIN_PADDUSW512,
30048 IX86_BUILTIN_PMAXUW512,
30049 IX86_BUILTIN_PMAXSW512,
30050 IX86_BUILTIN_PMINUW512,
30051 IX86_BUILTIN_PMINSW512,
30052 IX86_BUILTIN_PMAXUB512,
30053 IX86_BUILTIN_PMAXSB512,
30054 IX86_BUILTIN_PMINUB512,
30055 IX86_BUILTIN_PMINSB512,
30056 IX86_BUILTIN_PMOVWB512,
30057 IX86_BUILTIN_PMOVSWB512,
30058 IX86_BUILTIN_PMOVUSWB512,
30059 IX86_BUILTIN_PMULHRSW512_MASK,
30060 IX86_BUILTIN_PMULHUW512_MASK,
30061 IX86_BUILTIN_PMULHW512_MASK,
30062 IX86_BUILTIN_PMULLW512_MASK,
30063 IX86_BUILTIN_PSLLWI512_MASK,
30064 IX86_BUILTIN_PSLLW512_MASK,
30065 IX86_BUILTIN_PACKSSWB512,
30066 IX86_BUILTIN_PACKUSWB512,
30067 IX86_BUILTIN_PSRAVV32HI,
30068 IX86_BUILTIN_PMADDUBSW512_MASK,
30069 IX86_BUILTIN_PMADDWD512_MASK,
30070 IX86_BUILTIN_PSRLVV32HI,
30071 IX86_BUILTIN_PUNPCKHBW512,
30072 IX86_BUILTIN_PUNPCKHWD512,
30073 IX86_BUILTIN_PUNPCKLBW512,
30074 IX86_BUILTIN_PUNPCKLWD512,
30075 IX86_BUILTIN_PSHUFB512,
30076 IX86_BUILTIN_PSHUFHW512,
30077 IX86_BUILTIN_PSHUFLW512,
30078 IX86_BUILTIN_PSRAWI512,
30079 IX86_BUILTIN_PSRAW512,
30080 IX86_BUILTIN_PSRLWI512,
30081 IX86_BUILTIN_PSRLW512,
30082 IX86_BUILTIN_CVTB2MASK512,
30083 IX86_BUILTIN_CVTW2MASK512,
30084 IX86_BUILTIN_CVTMASK2B512,
30085 IX86_BUILTIN_CVTMASK2W512,
30086 IX86_BUILTIN_PCMPEQB512_MASK,
30087 IX86_BUILTIN_PCMPEQW512_MASK,
30088 IX86_BUILTIN_PCMPGTB512_MASK,
30089 IX86_BUILTIN_PCMPGTW512_MASK,
30090 IX86_BUILTIN_PTESTMB512,
30091 IX86_BUILTIN_PTESTMW512,
30092 IX86_BUILTIN_PTESTNMB512,
30093 IX86_BUILTIN_PTESTNMW512,
30094 IX86_BUILTIN_PSLLVV32HI,
30095 IX86_BUILTIN_PABSB512,
30096 IX86_BUILTIN_PABSW512,
30097 IX86_BUILTIN_BLENDMW512,
30098 IX86_BUILTIN_BLENDMB512,
30099 IX86_BUILTIN_CMPB512,
30100 IX86_BUILTIN_CMPW512,
30101 IX86_BUILTIN_UCMPB512,
30102 IX86_BUILTIN_UCMPW512,
30104 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30105 where all operands are 32-byte or 64-byte wide respectively. */
30106 IX86_BUILTIN_GATHERALTSIV4DF,
30107 IX86_BUILTIN_GATHERALTDIV8SF,
30108 IX86_BUILTIN_GATHERALTSIV4DI,
30109 IX86_BUILTIN_GATHERALTDIV8SI,
30110 IX86_BUILTIN_GATHER3ALTDIV16SF,
30111 IX86_BUILTIN_GATHER3ALTDIV16SI,
30112 IX86_BUILTIN_GATHER3ALTSIV4DF,
30113 IX86_BUILTIN_GATHER3ALTDIV8SF,
30114 IX86_BUILTIN_GATHER3ALTSIV4DI,
30115 IX86_BUILTIN_GATHER3ALTDIV8SI,
30116 IX86_BUILTIN_GATHER3ALTSIV8DF,
30117 IX86_BUILTIN_GATHER3ALTSIV8DI,
30118 IX86_BUILTIN_GATHER3DIV16SF,
30119 IX86_BUILTIN_GATHER3DIV16SI,
30120 IX86_BUILTIN_GATHER3DIV8DF,
30121 IX86_BUILTIN_GATHER3DIV8DI,
30122 IX86_BUILTIN_GATHER3SIV16SF,
30123 IX86_BUILTIN_GATHER3SIV16SI,
30124 IX86_BUILTIN_GATHER3SIV8DF,
30125 IX86_BUILTIN_GATHER3SIV8DI,
30126 IX86_BUILTIN_SCATTERDIV16SF,
30127 IX86_BUILTIN_SCATTERDIV16SI,
30128 IX86_BUILTIN_SCATTERDIV8DF,
30129 IX86_BUILTIN_SCATTERDIV8DI,
30130 IX86_BUILTIN_SCATTERSIV16SF,
30131 IX86_BUILTIN_SCATTERSIV16SI,
30132 IX86_BUILTIN_SCATTERSIV8DF,
30133 IX86_BUILTIN_SCATTERSIV8DI,
30135 /* AVX512PF */
30136 IX86_BUILTIN_GATHERPFQPD,
30137 IX86_BUILTIN_GATHERPFDPS,
30138 IX86_BUILTIN_GATHERPFDPD,
30139 IX86_BUILTIN_GATHERPFQPS,
30140 IX86_BUILTIN_SCATTERPFDPD,
30141 IX86_BUILTIN_SCATTERPFDPS,
30142 IX86_BUILTIN_SCATTERPFQPD,
30143 IX86_BUILTIN_SCATTERPFQPS,
30145 /* AVX-512ER */
30146 IX86_BUILTIN_EXP2PD_MASK,
30147 IX86_BUILTIN_EXP2PS_MASK,
30148 IX86_BUILTIN_EXP2PS,
30149 IX86_BUILTIN_RCP28PD,
30150 IX86_BUILTIN_RCP28PS,
30151 IX86_BUILTIN_RCP28SD,
30152 IX86_BUILTIN_RCP28SS,
30153 IX86_BUILTIN_RSQRT28PD,
30154 IX86_BUILTIN_RSQRT28PS,
30155 IX86_BUILTIN_RSQRT28SD,
30156 IX86_BUILTIN_RSQRT28SS,
30158 /* AVX-512IFMA */
30159 IX86_BUILTIN_VPMADD52LUQ512,
30160 IX86_BUILTIN_VPMADD52HUQ512,
30161 IX86_BUILTIN_VPMADD52LUQ256,
30162 IX86_BUILTIN_VPMADD52HUQ256,
30163 IX86_BUILTIN_VPMADD52LUQ128,
30164 IX86_BUILTIN_VPMADD52HUQ128,
30165 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
30166 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
30167 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
30168 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
30169 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
30170 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
30172 /* AVX-512VBMI */
30173 IX86_BUILTIN_VPMULTISHIFTQB512,
30174 IX86_BUILTIN_VPMULTISHIFTQB256,
30175 IX86_BUILTIN_VPMULTISHIFTQB128,
30176 IX86_BUILTIN_VPERMVARQI512_MASK,
30177 IX86_BUILTIN_VPERMT2VARQI512,
30178 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
30179 IX86_BUILTIN_VPERMI2VARQI512,
30180 IX86_BUILTIN_VPERMVARQI256_MASK,
30181 IX86_BUILTIN_VPERMVARQI128_MASK,
30182 IX86_BUILTIN_VPERMT2VARQI256,
30183 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
30184 IX86_BUILTIN_VPERMT2VARQI128,
30185 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
30186 IX86_BUILTIN_VPERMI2VARQI256,
30187 IX86_BUILTIN_VPERMI2VARQI128,
30189 /* SHA builtins. */
30190 IX86_BUILTIN_SHA1MSG1,
30191 IX86_BUILTIN_SHA1MSG2,
30192 IX86_BUILTIN_SHA1NEXTE,
30193 IX86_BUILTIN_SHA1RNDS4,
30194 IX86_BUILTIN_SHA256MSG1,
30195 IX86_BUILTIN_SHA256MSG2,
30196 IX86_BUILTIN_SHA256RNDS2,
30198 /* CLWB instructions. */
30199 IX86_BUILTIN_CLWB,
30201 /* PCOMMIT instructions. */
30202 IX86_BUILTIN_PCOMMIT,
30204 /* CLFLUSHOPT instructions. */
30205 IX86_BUILTIN_CLFLUSHOPT,
30207 /* TFmode support builtins. */
30208 IX86_BUILTIN_INFQ,
30209 IX86_BUILTIN_HUGE_VALQ,
30210 IX86_BUILTIN_FABSQ,
30211 IX86_BUILTIN_COPYSIGNQ,
30213 /* Vectorizer support builtins. */
30214 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30215 IX86_BUILTIN_CPYSGNPS,
30216 IX86_BUILTIN_CPYSGNPD,
30217 IX86_BUILTIN_CPYSGNPS256,
30218 IX86_BUILTIN_CPYSGNPS512,
30219 IX86_BUILTIN_CPYSGNPD256,
30220 IX86_BUILTIN_CPYSGNPD512,
30221 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30222 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30225 /* FMA4 instructions. */
30226 IX86_BUILTIN_VFMADDSS,
30227 IX86_BUILTIN_VFMADDSD,
30228 IX86_BUILTIN_VFMADDPS,
30229 IX86_BUILTIN_VFMADDPD,
30230 IX86_BUILTIN_VFMADDPS256,
30231 IX86_BUILTIN_VFMADDPD256,
30232 IX86_BUILTIN_VFMADDSUBPS,
30233 IX86_BUILTIN_VFMADDSUBPD,
30234 IX86_BUILTIN_VFMADDSUBPS256,
30235 IX86_BUILTIN_VFMADDSUBPD256,
30237 /* FMA3 instructions. */
30238 IX86_BUILTIN_VFMADDSS3,
30239 IX86_BUILTIN_VFMADDSD3,
30241 /* XOP instructions. */
30242 IX86_BUILTIN_VPCMOV,
30243 IX86_BUILTIN_VPCMOV_V2DI,
30244 IX86_BUILTIN_VPCMOV_V4SI,
30245 IX86_BUILTIN_VPCMOV_V8HI,
30246 IX86_BUILTIN_VPCMOV_V16QI,
30247 IX86_BUILTIN_VPCMOV_V4SF,
30248 IX86_BUILTIN_VPCMOV_V2DF,
30249 IX86_BUILTIN_VPCMOV256,
30250 IX86_BUILTIN_VPCMOV_V4DI256,
30251 IX86_BUILTIN_VPCMOV_V8SI256,
30252 IX86_BUILTIN_VPCMOV_V16HI256,
30253 IX86_BUILTIN_VPCMOV_V32QI256,
30254 IX86_BUILTIN_VPCMOV_V8SF256,
30255 IX86_BUILTIN_VPCMOV_V4DF256,
30257 IX86_BUILTIN_VPPERM,
30259 IX86_BUILTIN_VPMACSSWW,
30260 IX86_BUILTIN_VPMACSWW,
30261 IX86_BUILTIN_VPMACSSWD,
30262 IX86_BUILTIN_VPMACSWD,
30263 IX86_BUILTIN_VPMACSSDD,
30264 IX86_BUILTIN_VPMACSDD,
30265 IX86_BUILTIN_VPMACSSDQL,
30266 IX86_BUILTIN_VPMACSSDQH,
30267 IX86_BUILTIN_VPMACSDQL,
30268 IX86_BUILTIN_VPMACSDQH,
30269 IX86_BUILTIN_VPMADCSSWD,
30270 IX86_BUILTIN_VPMADCSWD,
30272 IX86_BUILTIN_VPHADDBW,
30273 IX86_BUILTIN_VPHADDBD,
30274 IX86_BUILTIN_VPHADDBQ,
30275 IX86_BUILTIN_VPHADDWD,
30276 IX86_BUILTIN_VPHADDWQ,
30277 IX86_BUILTIN_VPHADDDQ,
30278 IX86_BUILTIN_VPHADDUBW,
30279 IX86_BUILTIN_VPHADDUBD,
30280 IX86_BUILTIN_VPHADDUBQ,
30281 IX86_BUILTIN_VPHADDUWD,
30282 IX86_BUILTIN_VPHADDUWQ,
30283 IX86_BUILTIN_VPHADDUDQ,
30284 IX86_BUILTIN_VPHSUBBW,
30285 IX86_BUILTIN_VPHSUBWD,
30286 IX86_BUILTIN_VPHSUBDQ,
30288 IX86_BUILTIN_VPROTB,
30289 IX86_BUILTIN_VPROTW,
30290 IX86_BUILTIN_VPROTD,
30291 IX86_BUILTIN_VPROTQ,
30292 IX86_BUILTIN_VPROTB_IMM,
30293 IX86_BUILTIN_VPROTW_IMM,
30294 IX86_BUILTIN_VPROTD_IMM,
30295 IX86_BUILTIN_VPROTQ_IMM,
30297 IX86_BUILTIN_VPSHLB,
30298 IX86_BUILTIN_VPSHLW,
30299 IX86_BUILTIN_VPSHLD,
30300 IX86_BUILTIN_VPSHLQ,
30301 IX86_BUILTIN_VPSHAB,
30302 IX86_BUILTIN_VPSHAW,
30303 IX86_BUILTIN_VPSHAD,
30304 IX86_BUILTIN_VPSHAQ,
30306 IX86_BUILTIN_VFRCZSS,
30307 IX86_BUILTIN_VFRCZSD,
30308 IX86_BUILTIN_VFRCZPS,
30309 IX86_BUILTIN_VFRCZPD,
30310 IX86_BUILTIN_VFRCZPS256,
30311 IX86_BUILTIN_VFRCZPD256,
30313 IX86_BUILTIN_VPCOMEQUB,
30314 IX86_BUILTIN_VPCOMNEUB,
30315 IX86_BUILTIN_VPCOMLTUB,
30316 IX86_BUILTIN_VPCOMLEUB,
30317 IX86_BUILTIN_VPCOMGTUB,
30318 IX86_BUILTIN_VPCOMGEUB,
30319 IX86_BUILTIN_VPCOMFALSEUB,
30320 IX86_BUILTIN_VPCOMTRUEUB,
30322 IX86_BUILTIN_VPCOMEQUW,
30323 IX86_BUILTIN_VPCOMNEUW,
30324 IX86_BUILTIN_VPCOMLTUW,
30325 IX86_BUILTIN_VPCOMLEUW,
30326 IX86_BUILTIN_VPCOMGTUW,
30327 IX86_BUILTIN_VPCOMGEUW,
30328 IX86_BUILTIN_VPCOMFALSEUW,
30329 IX86_BUILTIN_VPCOMTRUEUW,
30331 IX86_BUILTIN_VPCOMEQUD,
30332 IX86_BUILTIN_VPCOMNEUD,
30333 IX86_BUILTIN_VPCOMLTUD,
30334 IX86_BUILTIN_VPCOMLEUD,
30335 IX86_BUILTIN_VPCOMGTUD,
30336 IX86_BUILTIN_VPCOMGEUD,
30337 IX86_BUILTIN_VPCOMFALSEUD,
30338 IX86_BUILTIN_VPCOMTRUEUD,
30340 IX86_BUILTIN_VPCOMEQUQ,
30341 IX86_BUILTIN_VPCOMNEUQ,
30342 IX86_BUILTIN_VPCOMLTUQ,
30343 IX86_BUILTIN_VPCOMLEUQ,
30344 IX86_BUILTIN_VPCOMGTUQ,
30345 IX86_BUILTIN_VPCOMGEUQ,
30346 IX86_BUILTIN_VPCOMFALSEUQ,
30347 IX86_BUILTIN_VPCOMTRUEUQ,
30349 IX86_BUILTIN_VPCOMEQB,
30350 IX86_BUILTIN_VPCOMNEB,
30351 IX86_BUILTIN_VPCOMLTB,
30352 IX86_BUILTIN_VPCOMLEB,
30353 IX86_BUILTIN_VPCOMGTB,
30354 IX86_BUILTIN_VPCOMGEB,
30355 IX86_BUILTIN_VPCOMFALSEB,
30356 IX86_BUILTIN_VPCOMTRUEB,
30358 IX86_BUILTIN_VPCOMEQW,
30359 IX86_BUILTIN_VPCOMNEW,
30360 IX86_BUILTIN_VPCOMLTW,
30361 IX86_BUILTIN_VPCOMLEW,
30362 IX86_BUILTIN_VPCOMGTW,
30363 IX86_BUILTIN_VPCOMGEW,
30364 IX86_BUILTIN_VPCOMFALSEW,
30365 IX86_BUILTIN_VPCOMTRUEW,
30367 IX86_BUILTIN_VPCOMEQD,
30368 IX86_BUILTIN_VPCOMNED,
30369 IX86_BUILTIN_VPCOMLTD,
30370 IX86_BUILTIN_VPCOMLED,
30371 IX86_BUILTIN_VPCOMGTD,
30372 IX86_BUILTIN_VPCOMGED,
30373 IX86_BUILTIN_VPCOMFALSED,
30374 IX86_BUILTIN_VPCOMTRUED,
30376 IX86_BUILTIN_VPCOMEQQ,
30377 IX86_BUILTIN_VPCOMNEQ,
30378 IX86_BUILTIN_VPCOMLTQ,
30379 IX86_BUILTIN_VPCOMLEQ,
30380 IX86_BUILTIN_VPCOMGTQ,
30381 IX86_BUILTIN_VPCOMGEQ,
30382 IX86_BUILTIN_VPCOMFALSEQ,
30383 IX86_BUILTIN_VPCOMTRUEQ,
30385 /* LWP instructions. */
30386 IX86_BUILTIN_LLWPCB,
30387 IX86_BUILTIN_SLWPCB,
30388 IX86_BUILTIN_LWPVAL32,
30389 IX86_BUILTIN_LWPVAL64,
30390 IX86_BUILTIN_LWPINS32,
30391 IX86_BUILTIN_LWPINS64,
30393 IX86_BUILTIN_CLZS,
30395 /* RTM */
30396 IX86_BUILTIN_XBEGIN,
30397 IX86_BUILTIN_XEND,
30398 IX86_BUILTIN_XABORT,
30399 IX86_BUILTIN_XTEST,
30401 /* MPX */
30402 IX86_BUILTIN_BNDMK,
30403 IX86_BUILTIN_BNDSTX,
30404 IX86_BUILTIN_BNDLDX,
30405 IX86_BUILTIN_BNDCL,
30406 IX86_BUILTIN_BNDCU,
30407 IX86_BUILTIN_BNDRET,
30408 IX86_BUILTIN_BNDNARROW,
30409 IX86_BUILTIN_BNDINT,
30410 IX86_BUILTIN_SIZEOF,
30411 IX86_BUILTIN_BNDLOWER,
30412 IX86_BUILTIN_BNDUPPER,
30414 /* BMI instructions. */
30415 IX86_BUILTIN_BEXTR32,
30416 IX86_BUILTIN_BEXTR64,
30417 IX86_BUILTIN_CTZS,
30419 /* TBM instructions. */
30420 IX86_BUILTIN_BEXTRI32,
30421 IX86_BUILTIN_BEXTRI64,
30423 /* BMI2 instructions. */
30424 IX86_BUILTIN_BZHI32,
30425 IX86_BUILTIN_BZHI64,
30426 IX86_BUILTIN_PDEP32,
30427 IX86_BUILTIN_PDEP64,
30428 IX86_BUILTIN_PEXT32,
30429 IX86_BUILTIN_PEXT64,
30431 /* ADX instructions. */
30432 IX86_BUILTIN_ADDCARRYX32,
30433 IX86_BUILTIN_ADDCARRYX64,
30435 /* SBB instructions. */
30436 IX86_BUILTIN_SBB32,
30437 IX86_BUILTIN_SBB64,
30439 /* FSGSBASE instructions. */
30440 IX86_BUILTIN_RDFSBASE32,
30441 IX86_BUILTIN_RDFSBASE64,
30442 IX86_BUILTIN_RDGSBASE32,
30443 IX86_BUILTIN_RDGSBASE64,
30444 IX86_BUILTIN_WRFSBASE32,
30445 IX86_BUILTIN_WRFSBASE64,
30446 IX86_BUILTIN_WRGSBASE32,
30447 IX86_BUILTIN_WRGSBASE64,
30449 /* RDRND instructions. */
30450 IX86_BUILTIN_RDRAND16_STEP,
30451 IX86_BUILTIN_RDRAND32_STEP,
30452 IX86_BUILTIN_RDRAND64_STEP,
30454 /* RDSEED instructions. */
30455 IX86_BUILTIN_RDSEED16_STEP,
30456 IX86_BUILTIN_RDSEED32_STEP,
30457 IX86_BUILTIN_RDSEED64_STEP,
30459 /* F16C instructions. */
30460 IX86_BUILTIN_CVTPH2PS,
30461 IX86_BUILTIN_CVTPH2PS256,
30462 IX86_BUILTIN_CVTPS2PH,
30463 IX86_BUILTIN_CVTPS2PH256,
30465 /* CFString built-in for darwin */
30466 IX86_BUILTIN_CFSTRING,
30468 /* Builtins to get CPU type and supported features. */
30469 IX86_BUILTIN_CPU_INIT,
30470 IX86_BUILTIN_CPU_IS,
30471 IX86_BUILTIN_CPU_SUPPORTS,
30473 /* Read/write FLAGS register built-ins. */
30474 IX86_BUILTIN_READ_FLAGS,
30475 IX86_BUILTIN_WRITE_FLAGS,
30477 IX86_BUILTIN_MAX
30480 /* Table for the ix86 builtin decls. */
30481 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30483 /* Table of all of the builtin functions that are possible with different ISA's
30484 but are waiting to be built until a function is declared to use that
30485 ISA. */
30486 struct builtin_isa {
30487 const char *name; /* function name */
30488 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30489 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30490 bool const_p; /* true if the declaration is constant */
30491 bool leaf_p; /* true if the declaration has leaf attribute */
30492 bool nothrow_p; /* true if the declaration has nothrow attribute */
30493 bool set_and_not_built_p;
30496 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30498 /* Bits that can still enable any inclusion of a builtin. */
30499 static HOST_WIDE_INT deferred_isa_values = 0;
30501 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30502 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30503 function decl in the ix86_builtins array. Returns the function decl or
30504 NULL_TREE, if the builtin was not added.
30506 If the front end has a special hook for builtin functions, delay adding
30507 builtin functions that aren't in the current ISA until the ISA is changed
30508 with function specific optimization. Doing so, can save about 300K for the
30509 default compiler. When the builtin is expanded, check at that time whether
30510 it is valid.
30512 If the front end doesn't have a special hook, record all builtins, even if
30513 it isn't an instruction set in the current ISA in case the user uses
30514 function specific options for a different ISA, so that we don't get scope
30515 errors if a builtin is added in the middle of a function scope. */
30517 static inline tree
30518 def_builtin (HOST_WIDE_INT mask, const char *name,
30519 enum ix86_builtin_func_type tcode,
30520 enum ix86_builtins code)
30522 tree decl = NULL_TREE;
30524 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30526 ix86_builtins_isa[(int) code].isa = mask;
30528 mask &= ~OPTION_MASK_ISA_64BIT;
30529 if (mask == 0
30530 || (mask & ix86_isa_flags) != 0
30531 || (lang_hooks.builtin_function
30532 == lang_hooks.builtin_function_ext_scope))
30535 tree type = ix86_get_builtin_func_type (tcode);
30536 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30537 NULL, NULL_TREE);
30538 ix86_builtins[(int) code] = decl;
30539 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30541 else
30543 /* Just a MASK where set_and_not_built_p == true can potentially
30544 include a builtin. */
30545 deferred_isa_values |= mask;
30546 ix86_builtins[(int) code] = NULL_TREE;
30547 ix86_builtins_isa[(int) code].tcode = tcode;
30548 ix86_builtins_isa[(int) code].name = name;
30549 ix86_builtins_isa[(int) code].leaf_p = false;
30550 ix86_builtins_isa[(int) code].nothrow_p = false;
30551 ix86_builtins_isa[(int) code].const_p = false;
30552 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30556 return decl;
30559 /* Like def_builtin, but also marks the function decl "const". */
30561 static inline tree
30562 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30563 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30565 tree decl = def_builtin (mask, name, tcode, code);
30566 if (decl)
30567 TREE_READONLY (decl) = 1;
30568 else
30569 ix86_builtins_isa[(int) code].const_p = true;
30571 return decl;
30574 /* Add any new builtin functions for a given ISA that may not have been
30575 declared. This saves a bit of space compared to adding all of the
30576 declarations to the tree, even if we didn't use them. */
30578 static void
30579 ix86_add_new_builtins (HOST_WIDE_INT isa)
30581 if ((isa & deferred_isa_values) == 0)
30582 return;
30584 /* Bits in ISA value can be removed from potential isa values. */
30585 deferred_isa_values &= ~isa;
30587 int i;
30588 tree saved_current_target_pragma = current_target_pragma;
30589 current_target_pragma = NULL_TREE;
30591 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30593 if ((ix86_builtins_isa[i].isa & isa) != 0
30594 && ix86_builtins_isa[i].set_and_not_built_p)
30596 tree decl, type;
30598 /* Don't define the builtin again. */
30599 ix86_builtins_isa[i].set_and_not_built_p = false;
30601 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30602 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30603 type, i, BUILT_IN_MD, NULL,
30604 NULL_TREE);
30606 ix86_builtins[i] = decl;
30607 if (ix86_builtins_isa[i].const_p)
30608 TREE_READONLY (decl) = 1;
30609 if (ix86_builtins_isa[i].leaf_p)
30610 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30611 NULL_TREE);
30612 if (ix86_builtins_isa[i].nothrow_p)
30613 TREE_NOTHROW (decl) = 1;
30617 current_target_pragma = saved_current_target_pragma;
30620 /* Bits for builtin_description.flag. */
30622 /* Set when we don't support the comparison natively, and should
30623 swap_comparison in order to support it. */
30624 #define BUILTIN_DESC_SWAP_OPERANDS 1
30626 struct builtin_description
30628 const HOST_WIDE_INT mask;
30629 const enum insn_code icode;
30630 const char *const name;
30631 const enum ix86_builtins code;
30632 const enum rtx_code comparison;
30633 const int flag;
30636 static const struct builtin_description bdesc_comi[] =
30638 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30639 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30640 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30641 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30642 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30643 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30644 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30645 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30646 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30647 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30648 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30649 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30650 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30651 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30652 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30653 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30654 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30655 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30656 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30657 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30658 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30659 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30660 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30661 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30664 static const struct builtin_description bdesc_pcmpestr[] =
30666 /* SSE4.2 */
30667 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30668 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30669 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30670 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30671 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30672 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30673 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30676 static const struct builtin_description bdesc_pcmpistr[] =
30678 /* SSE4.2 */
30679 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30680 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30681 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30682 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30683 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30684 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30685 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30688 /* Special builtins with variable number of arguments. */
30689 static const struct builtin_description bdesc_special_args[] =
30691 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30692 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30693 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
30695 /* 80387 (for use internally for atomic compound assignment). */
30696 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
30697 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
30698 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
30699 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
30701 /* MMX */
30702 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30704 /* 3DNow! */
30705 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30707 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
30708 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
30709 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
30710 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30711 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30712 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30713 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30714 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30715 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30717 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30718 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30719 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30720 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30721 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30722 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30723 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30724 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30726 /* SSE */
30727 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30728 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30729 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30731 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30732 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30733 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30734 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30736 /* SSE or 3DNow!A */
30737 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30738 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
30740 /* SSE2 */
30741 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30742 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30743 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30744 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
30745 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30746 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
30747 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
30748 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
30749 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
30750 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30752 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30753 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30755 /* SSE3 */
30756 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30758 /* SSE4.1 */
30759 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
30761 /* SSE4A */
30762 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30763 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30765 /* AVX */
30766 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
30767 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
30769 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30770 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30771 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30772 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
30773 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
30775 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30776 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30777 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30780 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
30781 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
30784 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
30788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
30789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
30790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
30791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
30792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
30793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
30794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
30796 /* AVX2 */
30797 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
30798 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
30799 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
30800 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
30801 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
30802 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
30803 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
30804 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
30805 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
30807 /* AVX512F */
30808 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30809 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30810 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30811 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30812 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30813 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30814 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30818 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30819 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30823 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
30829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
30830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
30831 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
30832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30833 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30835 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30836 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30837 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30841 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30842 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30843 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30844 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30846 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30847 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30856 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
30857 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
30858 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
30859 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
30860 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
30861 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
30863 /* FSGSBASE */
30864 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30865 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30866 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30867 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30868 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30869 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30870 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30871 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30873 /* RTM */
30874 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30875 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
30876 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
30878 /* AVX512BW */
30879 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
30880 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
30881 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
30882 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
30884 /* AVX512VL */
30885 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
30886 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
30887 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
30888 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
30889 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30890 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30891 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30892 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30893 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30894 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30895 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30896 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30897 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30898 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30899 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30900 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30901 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30902 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30903 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30904 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30905 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30906 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30907 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30908 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30909 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30910 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30911 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30912 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30913 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30914 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30915 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30916 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30917 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30918 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30919 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30920 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30921 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
30922 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
30923 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
30924 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
30925 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30926 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30927 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30928 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30929 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30930 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30931 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30932 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30933 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30934 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30935 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30936 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30937 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30938 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30939 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30940 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30941 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30942 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30943 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30944 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30945 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30946 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30947 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30948 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30949 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30950 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30951 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30952 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30953 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30954 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30955 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30956 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30957 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30958 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30959 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30960 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30961 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30962 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30963 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30964 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30965 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30966 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30967 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30968 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30969 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30970 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30971 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30972 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30973 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30974 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30975 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30976 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30977 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30978 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30980 /* PCOMMIT. */
30981 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
30984 /* Builtins with variable number of arguments. */
30985 static const struct builtin_description bdesc_args[] =
30987 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
30988 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
30989 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
30990 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
30991 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
30992 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
30993 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
30995 /* MMX */
30996 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30997 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30998 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30999 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31000 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31001 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31003 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31004 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31005 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31006 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31007 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31008 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31009 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31010 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31012 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31013 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31015 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31016 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31017 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31018 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31020 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31021 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31022 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31023 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31024 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31025 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31027 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31028 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31029 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31030 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31031 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
31032 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
31034 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31035 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
31036 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31038 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
31040 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31041 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31042 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31043 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31044 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31045 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31047 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31048 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31049 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31050 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31051 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31052 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31054 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31055 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31056 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31057 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31059 /* 3DNow! */
31060 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31061 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31062 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31063 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31065 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31066 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31067 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31068 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31069 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31070 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31071 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31072 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31073 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31074 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31075 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31076 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31077 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31078 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31079 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31081 /* 3DNow!A */
31082 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31083 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31084 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31085 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31086 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31087 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31089 /* SSE */
31090 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
31091 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31092 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31093 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31094 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31095 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31096 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31097 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31098 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31099 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31100 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31101 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31103 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31105 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31106 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31107 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31108 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31109 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31110 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31111 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31112 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31114 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31115 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31116 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31117 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31118 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31119 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31120 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31121 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31122 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31123 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31124 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
31125 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31126 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31127 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31128 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31129 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31130 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31131 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31132 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31133 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31135 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31136 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31137 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31138 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31140 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31141 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31142 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31143 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31145 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31147 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31148 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31149 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31150 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31151 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31153 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
31154 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
31155 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
31157 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
31159 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31160 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31161 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31163 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
31164 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
31166 /* SSE MMX or 3Dnow!A */
31167 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31168 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31169 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31171 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31172 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31173 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31174 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31176 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
31177 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
31179 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
31181 /* SSE2 */
31182 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31184 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
31185 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
31186 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31187 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
31188 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
31190 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31191 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31192 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
31193 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31194 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31196 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
31198 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31199 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31200 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31201 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31203 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31204 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31205 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31207 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31208 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31209 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31210 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31211 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31212 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31213 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31214 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31216 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31217 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31218 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31219 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31220 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31221 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31222 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31223 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31224 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31225 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31226 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31227 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31228 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31229 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31230 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31231 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31232 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31233 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31234 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31235 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31237 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31238 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31239 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31240 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31242 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31243 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31244 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31245 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31247 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31249 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31250 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31251 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31253 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31255 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31256 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31257 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31258 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31259 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31260 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31261 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31262 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31264 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31265 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31266 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31267 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31268 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31269 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31270 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31271 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31273 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31274 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31276 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31277 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31278 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31279 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31281 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31282 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31284 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31285 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31286 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31287 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31288 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31289 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31291 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31292 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31293 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31294 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31296 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31297 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31298 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31299 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31300 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31301 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31302 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31303 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31305 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31306 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31307 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31309 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31310 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31312 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31313 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31315 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31317 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31318 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31323 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31324 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31325 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31326 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31327 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31328 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31330 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31331 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31332 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31333 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31334 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31335 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31336 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31338 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31339 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31340 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31341 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31343 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31344 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31345 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31347 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31349 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31351 /* SSE2 MMX */
31352 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31353 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31355 /* SSE3 */
31356 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31357 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31359 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31360 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31361 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31362 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31363 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31364 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31366 /* SSSE3 */
31367 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31368 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31369 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31370 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31371 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31372 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31374 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31375 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31376 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31377 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31378 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31379 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31380 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31381 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31382 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31383 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31384 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31385 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31386 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31387 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31388 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31389 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31390 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31391 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31392 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31393 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31394 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31395 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31396 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31397 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31399 /* SSSE3. */
31400 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31401 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31403 /* SSE4.1 */
31404 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31405 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31406 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31407 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31408 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31409 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31410 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31411 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31412 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31413 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31415 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31416 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31417 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31418 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31419 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31420 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31421 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31422 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31423 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31424 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31425 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31426 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31427 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31429 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31430 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31431 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31432 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31433 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31434 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31435 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31436 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31437 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31438 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31439 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31440 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31442 /* SSE4.1 */
31443 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31444 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31445 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31446 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31448 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31449 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31450 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31451 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31453 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31454 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31456 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31457 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31459 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31460 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31461 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31462 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31464 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31465 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31467 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31468 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31470 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31471 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31472 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31474 /* SSE4.2 */
31475 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31476 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31477 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31478 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31479 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31481 /* SSE4A */
31482 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31483 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31484 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31485 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31487 /* AES */
31488 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31489 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31491 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31492 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31493 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31494 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31496 /* PCLMUL */
31497 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31499 /* AVX */
31500 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31501 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31502 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31503 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31504 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31505 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31506 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31507 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31508 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31509 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31510 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31511 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31512 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31513 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31514 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31515 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31516 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31517 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31518 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31519 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31520 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31521 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31522 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31523 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31524 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31525 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31527 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31528 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31529 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31530 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31532 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31533 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31534 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31535 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31536 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31537 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31538 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31539 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31540 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31541 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31542 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31543 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31544 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31545 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31546 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31547 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31548 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31549 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31550 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31551 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31552 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31553 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31554 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31555 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31556 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31557 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31558 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31559 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31560 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31561 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31562 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31563 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31564 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31565 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31567 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31568 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31569 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31571 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31572 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31573 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31574 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31575 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31577 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31579 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31580 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31582 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31583 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31584 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31585 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31587 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31588 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31590 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31591 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31593 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31594 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31595 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31596 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31598 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31599 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31601 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31602 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31604 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31605 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31606 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31607 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31609 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31610 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31611 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31612 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31613 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31614 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31616 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31617 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31618 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31619 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31620 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31621 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31622 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31623 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31624 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31625 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31626 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31627 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31628 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31629 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31630 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31632 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31633 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31635 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31636 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31638 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31640 /* AVX2 */
31641 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31642 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31643 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31644 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31645 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31646 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31647 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31648 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31649 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31650 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31651 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31652 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31653 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31654 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31655 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31656 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31657 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31658 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31659 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31660 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31661 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31662 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31663 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31664 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31665 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31666 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31667 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31668 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31669 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31670 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31671 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31672 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31673 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31674 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31675 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31676 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31677 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31678 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31679 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31680 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31681 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31682 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31683 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31684 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31685 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31686 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31687 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31688 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31689 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31690 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31691 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31692 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31693 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31694 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31695 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31696 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31697 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31698 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31699 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31700 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31701 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31702 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31703 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31704 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31705 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31706 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31707 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31708 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31709 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31710 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31711 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31712 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31713 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31714 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31715 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
31716 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31717 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31718 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31719 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31720 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31721 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31722 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31723 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31724 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31725 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31726 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31727 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31728 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31729 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31730 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31731 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31732 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31733 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31734 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31735 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31736 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31737 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31738 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31739 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31740 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31741 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31742 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31743 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31744 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31745 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31746 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31747 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31748 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31749 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31750 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31751 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31752 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31753 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31754 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31755 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31756 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31757 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31758 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31759 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31760 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31761 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31762 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
31763 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
31764 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31765 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31766 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31767 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31768 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31769 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31770 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31771 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31772 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31773 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
31774 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
31775 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
31776 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
31777 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31778 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31779 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31780 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31781 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31782 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31783 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31784 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31785 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31786 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31788 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31790 /* BMI */
31791 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31792 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31793 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31795 /* TBM */
31796 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31797 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31799 /* F16C */
31800 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
31801 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
31802 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
31803 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
31805 /* BMI2 */
31806 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31807 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31808 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31809 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31810 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31811 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31813 /* AVX512F */
31814 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
31815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
31816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
31817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
31818 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
31819 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
31820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31823 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
31828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
31830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
31831 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31833 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31835 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31836 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31837 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
31838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
31840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31841 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31842 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31843 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31844 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
31845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
31846 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
31847 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
31848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
31849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
31850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
31851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
31852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31862 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31864 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31866 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31867 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31868 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
31869 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
31870 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
31871 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
31872 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31873 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31874 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31875 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31876 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31877 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31878 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31879 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31880 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31881 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31882 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31883 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31884 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31885 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31886 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31887 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31888 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31889 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31890 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31891 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31892 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31893 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31894 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31895 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31896 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31897 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31898 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31899 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31900 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31901 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31902 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31903 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31904 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31905 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31906 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31907 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31908 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31909 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31910 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31911 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31912 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31913 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31914 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31915 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31980 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31981 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31982 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31983 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31984 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
31985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31986 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31988 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31989 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
31991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
31992 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31994 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
31997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
31998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
31999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
32012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
32013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
32014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32015 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
32017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32020 /* Mask arithmetic operations */
32021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
32024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
32028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
32032 /* SHA */
32033 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32034 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32035 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32036 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32037 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32038 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
32041 /* AVX512VL. */
32042 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
32043 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
32044 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32045 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32046 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32047 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32048 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32049 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32050 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32051 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32052 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32053 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32054 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32055 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32056 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32057 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32058 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32059 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32060 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32061 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32062 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32063 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32064 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32065 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32066 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32067 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32068 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32069 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32070 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32071 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32072 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32073 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32074 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32075 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32076 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32077 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32078 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32079 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32080 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32081 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32082 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32083 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32084 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32085 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32086 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32087 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32088 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32089 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32090 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32091 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32092 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32093 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32094 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32095 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
32096 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
32097 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
32098 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
32099 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32100 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32101 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32102 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32103 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32104 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32105 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32106 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32107 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32108 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32109 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32110 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32111 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32112 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32113 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32114 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32115 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32116 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32117 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32118 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32119 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32120 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32121 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32122 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32123 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32124 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32125 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32126 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32127 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32128 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32129 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32130 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32131 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
32132 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
32133 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
32134 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
32135 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32136 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
32137 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
32138 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
32139 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32140 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
32141 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32142 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
32143 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32144 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
32145 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32146 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32147 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32148 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32149 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32150 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32151 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32152 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
32153 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
32154 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
32155 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
32156 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32157 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32158 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32159 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32160 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32161 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32162 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32163 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32164 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32165 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32166 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32167 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32168 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32169 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32170 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32171 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32172 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32173 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32174 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32175 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32176 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32177 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32178 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32179 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32180 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32181 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32182 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32183 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32184 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32185 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32186 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32187 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32188 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32189 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32190 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32191 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32192 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32193 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32194 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32195 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32196 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32197 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32198 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32199 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32200 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32201 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32202 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32203 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32204 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32205 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32206 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32207 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32208 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32209 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32210 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32211 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32212 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32213 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32214 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32215 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32216 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32217 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32218 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32219 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32220 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32221 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32222 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32223 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32224 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32225 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32226 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32227 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32228 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32229 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32230 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32231 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32232 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32233 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32234 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32235 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32236 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32237 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32238 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32239 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32240 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32241 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32242 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32243 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32244 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32245 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32246 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32247 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32248 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32249 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32250 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32251 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32252 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32253 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32254 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32255 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32256 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32257 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32258 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32259 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32260 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32261 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32262 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32263 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32264 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32265 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32266 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32267 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32268 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32269 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32270 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32271 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32272 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32273 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32274 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32275 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32276 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32277 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32278 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32279 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32280 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32281 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32282 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32283 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32284 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32285 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32286 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32287 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32288 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32289 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32290 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32291 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32292 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32293 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32294 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32295 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32296 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32297 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32298 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32299 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32300 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32301 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32302 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32303 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32304 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32305 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32306 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32307 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32308 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32309 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32310 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32311 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32312 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32313 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32314 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32315 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32316 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32317 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32318 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32319 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32320 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32321 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32322 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32323 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32324 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32325 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32326 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32327 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32328 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32330 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32331 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32332 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32333 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32334 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32335 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32336 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32337 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32338 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32339 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32340 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32341 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32342 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32343 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32344 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32345 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32346 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32347 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32348 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32349 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32350 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32351 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32352 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32353 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32354 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32355 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32356 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32357 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32358 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32359 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32360 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32361 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32362 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32363 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32364 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32365 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32366 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32367 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32368 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32369 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32370 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32371 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32372 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32373 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32374 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32375 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32376 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32377 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32378 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32379 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32380 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32381 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32382 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32383 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32384 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32385 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32386 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32387 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32388 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32389 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32390 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32391 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32392 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32393 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32394 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32395 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32396 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32397 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32398 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32399 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32400 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32401 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32402 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32403 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32404 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32405 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32406 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32407 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32408 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32409 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32410 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32412 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32413 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32415 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32416 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32417 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32418 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32419 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32420 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32421 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32422 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32423 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32424 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
32425 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
32426 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32427 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32428 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
32429 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
32430 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
32431 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
32432 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32433 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32434 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32435 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32436 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32437 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32438 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32439 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32440 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32441 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32442 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32443 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32444 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32447 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32448 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32449 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32450 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32451 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32452 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32453 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32454 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32455 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32456 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32457 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32458 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32459 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32460 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32461 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32462 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32463 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32464 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32465 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32466 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32467 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32468 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32469 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32470 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32471 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32472 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32473 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32474 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32475 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32476 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32477 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32478 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32479 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32480 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32481 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32482 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32483 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32484 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32485 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32486 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32487 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32488 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32489 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32490 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32491 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32492 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32493 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32494 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32495 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32496 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32497 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32498 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32499 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32500 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32501 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32502 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32503 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32504 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32505 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32506 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32507 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32508 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32509 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32510 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32511 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32512 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32513 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32514 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32515 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32516 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32517 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32518 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32519 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32520 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32521 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32522 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32523 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32524 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32525 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32526 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32527 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32528 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32529 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32530 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32531 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32532 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
32533 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
32534 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32535 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
32536 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
32537 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32538 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
32539 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
32540 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
32541 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
32542 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
32543 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
32544 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
32545 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
32546 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
32547 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
32548 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
32549 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
32550 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
32551 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
32552 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
32553 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
32554 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32555 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32556 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32557 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32558 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32559 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32560 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32561 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32562 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32563 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32564 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32565 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32566 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32567 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32568 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32569 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32570 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32571 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32572 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32573 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32574 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32575 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32576 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32577 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32578 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32579 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32580 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32581 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32582 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32583 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32584 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32585 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32586 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32587 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32588 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32589 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32590 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32591 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32592 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32593 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32594 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32595 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32596 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32597 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32598 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32599 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32600 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32601 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32602 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32603 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32604 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32605 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32606 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32607 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32608 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32609 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32610 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32611 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32612 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32613 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32614 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32615 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32616 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32617 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32618 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32619 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32620 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32621 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32622 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32623 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32624 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32625 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32626 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32627 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32628 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32629 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32630 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32631 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32632 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32633 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32634 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32635 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32636 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32637 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32638 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32639 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32640 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32641 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32642 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32643 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32644 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32645 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32646 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32647 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32648 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32649 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32650 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32651 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32652 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32653 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32654 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32655 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32656 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32657 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32658 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32659 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32660 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32661 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32662 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32663 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32664 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32665 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32666 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32667 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32668 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32669 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32670 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32671 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32672 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32673 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32674 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32675 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32676 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32677 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32678 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32679 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32680 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32681 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32682 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32683 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32684 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32685 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32686 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32687 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32688 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32689 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32690 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32691 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32692 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32693 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32694 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32695 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32696 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32697 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32698 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32699 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32700 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32701 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32702 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32703 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32704 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32705 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32706 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32707 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32708 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32709 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32710 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32711 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32712 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32713 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32714 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32715 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32716 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32717 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32718 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32719 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32720 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32721 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32722 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32723 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32724 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32725 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32726 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32727 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32728 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32729 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
32730 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
32731 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32732 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32733 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32734 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32735 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32736 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32737 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32738 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32739 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32740 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32741 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32742 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
32743 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
32744 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32745 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32746 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32747 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32748 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32749 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32750 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32751 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32752 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
32753 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
32755 /* AVX512DQ. */
32756 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32757 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32758 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32759 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32760 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
32761 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
32762 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
32763 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
32764 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
32765 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
32766 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32767 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32768 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32769 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32770 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32771 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32772 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32773 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32774 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32775 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
32776 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32777 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
32778 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
32779 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
32780 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
32781 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
32782 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
32783 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
32784 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
32785 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
32786 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
32788 /* AVX512BW. */
32789 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
32790 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
32791 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32792 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32793 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32794 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32795 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
32796 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
32797 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32798 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32799 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
32800 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
32801 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
32802 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
32803 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
32804 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
32805 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32806 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32807 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32808 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32809 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32810 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32811 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32812 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32813 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32814 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32815 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32816 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32817 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32818 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32819 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32820 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32821 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32822 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32823 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32824 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32825 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32826 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32827 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32828 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32829 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32830 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32831 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32832 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32833 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32834 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32835 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32836 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32837 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32838 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32839 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32840 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32841 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32842 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32843 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32844 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32845 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
32846 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
32847 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32848 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32849 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32850 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32851 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32852 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32853 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32854 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32855 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32856 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32857 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32858 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32859 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
32860 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
32861 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
32862 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
32863 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32864 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32865 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32866 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32867 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32868 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32869 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32870 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32871 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32872 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32873 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32874 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32875 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32876 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32877 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32878 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32879 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32881 /* AVX512IFMA */
32882 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32883 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32884 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32885 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32886 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32887 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32888 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32889 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32890 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32891 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32892 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32893 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32895 /* AVX512VBMI */
32896 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32897 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32898 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32899 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32900 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32901 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32902 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32903 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32904 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32905 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32906 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32907 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32908 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32909 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32910 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32913 /* Builtins with rounding support. */
32914 static const struct builtin_description bdesc_round_args[] =
32916 /* AVX512F */
32917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
32922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
32923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
32924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
32925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
32926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
32927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
32930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
32932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
32934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
32936 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
32937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
32938 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
32939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
32940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32945 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
32946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
32947 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
32948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32982 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32984 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32986 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32988 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32989 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32992 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32994 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
32997 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
32998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
32999 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33001 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33003 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33005 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33007 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33009 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33011 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
33019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
33020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33037 /* AVX512ER */
33038 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33039 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33040 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33041 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33042 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33043 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33044 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33045 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33046 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33047 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33049 /* AVX512DQ. */
33050 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33051 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33052 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33053 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33054 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33055 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33056 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33057 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33058 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33059 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33060 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33061 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33062 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33063 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33064 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
33065 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
33068 /* Bultins for MPX. */
33069 static const struct builtin_description bdesc_mpx[] =
33071 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
33072 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33073 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33076 /* Const builtins for MPX. */
33077 static const struct builtin_description bdesc_mpx_const[] =
33079 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
33080 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
33081 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
33082 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
33083 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
33084 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
33085 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
33086 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
33089 /* FMA4 and XOP. */
33090 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
33091 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
33092 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
33093 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
33094 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
33095 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
33096 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
33097 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
33098 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
33099 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
33100 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
33101 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
33102 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
33103 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
33104 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
33105 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
33106 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
33107 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
33108 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
33109 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
33110 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
33111 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
33112 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
33113 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
33114 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
33115 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
33116 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
33117 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
33118 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
33119 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
33120 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
33121 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
33122 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
33123 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
33124 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
33125 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
33126 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
33127 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
33128 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
33129 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
33130 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
33131 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
33132 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
33133 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
33134 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
33135 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
33136 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
33137 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
33138 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
33139 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
33140 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
33141 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
33143 static const struct builtin_description bdesc_multi_arg[] =
33145 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
33146 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
33147 UNKNOWN, (int)MULTI_ARG_3_SF },
33148 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
33149 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
33150 UNKNOWN, (int)MULTI_ARG_3_DF },
33152 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
33153 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
33154 UNKNOWN, (int)MULTI_ARG_3_SF },
33155 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
33156 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
33157 UNKNOWN, (int)MULTI_ARG_3_DF },
33159 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
33160 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
33161 UNKNOWN, (int)MULTI_ARG_3_SF },
33162 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
33163 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
33164 UNKNOWN, (int)MULTI_ARG_3_DF },
33165 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
33166 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
33167 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33168 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
33169 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
33170 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33172 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
33173 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
33174 UNKNOWN, (int)MULTI_ARG_3_SF },
33175 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
33176 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
33177 UNKNOWN, (int)MULTI_ARG_3_DF },
33178 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
33179 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
33180 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33181 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
33182 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
33183 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33185 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
33186 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
33187 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
33188 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
33189 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
33190 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
33191 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
33193 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33194 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33195 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
33196 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
33197 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
33198 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
33199 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
33201 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
33203 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33204 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33205 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33206 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33207 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33208 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33209 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33210 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33211 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33212 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33213 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33214 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33216 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33217 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
33218 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
33219 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
33220 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
33221 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
33222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
33223 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
33224 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33225 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
33226 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
33227 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
33228 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33229 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
33230 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
33231 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
33233 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
33234 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
33235 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
33236 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
33237 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
33238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
33240 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33241 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33242 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33243 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33244 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33245 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33246 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33247 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33248 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33250 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33251 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33252 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33253 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33254 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33256 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
33257 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33258 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33259 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
33260 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
33261 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
33262 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
33264 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
33265 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33266 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33267 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
33268 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
33269 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
33270 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
33272 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
33273 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33274 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33275 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
33276 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
33277 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
33278 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
33280 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33281 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33282 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33283 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
33284 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
33285 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
33286 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
33288 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
33289 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33290 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33291 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
33292 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
33293 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
33294 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
33296 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
33297 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33298 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33299 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
33300 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
33301 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
33302 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
33304 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
33305 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33306 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33307 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
33308 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
33309 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
33310 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
33312 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33313 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33314 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33315 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
33316 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
33317 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
33318 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
33320 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33321 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33322 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33323 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33324 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33325 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33326 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33327 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33329 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33330 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33331 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33332 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33333 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33334 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33335 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33336 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33338 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33339 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33340 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33341 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33345 /* TM vector builtins. */
33347 /* Reuse the existing x86-specific `struct builtin_description' cause
33348 we're lazy. Add casts to make them fit. */
33349 static const struct builtin_description bdesc_tm[] =
33351 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33352 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33353 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33354 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33355 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33356 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33357 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33359 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33360 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33361 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33362 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33363 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33364 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33365 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33367 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33368 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33369 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33370 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33371 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33372 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33373 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33375 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33376 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33377 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33380 /* TM callbacks. */
33382 /* Return the builtin decl needed to load a vector of TYPE. */
33384 static tree
33385 ix86_builtin_tm_load (tree type)
33387 if (TREE_CODE (type) == VECTOR_TYPE)
33389 switch (tree_to_uhwi (TYPE_SIZE (type)))
33391 case 64:
33392 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33393 case 128:
33394 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33395 case 256:
33396 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33399 return NULL_TREE;
33402 /* Return the builtin decl needed to store a vector of TYPE. */
33404 static tree
33405 ix86_builtin_tm_store (tree type)
33407 if (TREE_CODE (type) == VECTOR_TYPE)
33409 switch (tree_to_uhwi (TYPE_SIZE (type)))
33411 case 64:
33412 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33413 case 128:
33414 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33415 case 256:
33416 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33419 return NULL_TREE;
33422 /* Initialize the transactional memory vector load/store builtins. */
33424 static void
33425 ix86_init_tm_builtins (void)
33427 enum ix86_builtin_func_type ftype;
33428 const struct builtin_description *d;
33429 size_t i;
33430 tree decl;
33431 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33432 tree attrs_log, attrs_type_log;
33434 if (!flag_tm)
33435 return;
33437 /* If there are no builtins defined, we must be compiling in a
33438 language without trans-mem support. */
33439 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33440 return;
33442 /* Use whatever attributes a normal TM load has. */
33443 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33444 attrs_load = DECL_ATTRIBUTES (decl);
33445 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33446 /* Use whatever attributes a normal TM store has. */
33447 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33448 attrs_store = DECL_ATTRIBUTES (decl);
33449 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33450 /* Use whatever attributes a normal TM log has. */
33451 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33452 attrs_log = DECL_ATTRIBUTES (decl);
33453 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33455 for (i = 0, d = bdesc_tm;
33456 i < ARRAY_SIZE (bdesc_tm);
33457 i++, d++)
33459 if ((d->mask & ix86_isa_flags) != 0
33460 || (lang_hooks.builtin_function
33461 == lang_hooks.builtin_function_ext_scope))
33463 tree type, attrs, attrs_type;
33464 enum built_in_function code = (enum built_in_function) d->code;
33466 ftype = (enum ix86_builtin_func_type) d->flag;
33467 type = ix86_get_builtin_func_type (ftype);
33469 if (BUILTIN_TM_LOAD_P (code))
33471 attrs = attrs_load;
33472 attrs_type = attrs_type_load;
33474 else if (BUILTIN_TM_STORE_P (code))
33476 attrs = attrs_store;
33477 attrs_type = attrs_type_store;
33479 else
33481 attrs = attrs_log;
33482 attrs_type = attrs_type_log;
33484 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33485 /* The builtin without the prefix for
33486 calling it directly. */
33487 d->name + strlen ("__builtin_"),
33488 attrs);
33489 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33490 set the TYPE_ATTRIBUTES. */
33491 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33493 set_builtin_decl (code, decl, false);
33498 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33499 in the current target ISA to allow the user to compile particular modules
33500 with different target specific options that differ from the command line
33501 options. */
33502 static void
33503 ix86_init_mmx_sse_builtins (void)
33505 const struct builtin_description * d;
33506 enum ix86_builtin_func_type ftype;
33507 size_t i;
33509 /* Add all special builtins with variable number of operands. */
33510 for (i = 0, d = bdesc_special_args;
33511 i < ARRAY_SIZE (bdesc_special_args);
33512 i++, d++)
33514 if (d->name == 0)
33515 continue;
33517 ftype = (enum ix86_builtin_func_type) d->flag;
33518 def_builtin (d->mask, d->name, ftype, d->code);
33521 /* Add all builtins with variable number of operands. */
33522 for (i = 0, d = bdesc_args;
33523 i < ARRAY_SIZE (bdesc_args);
33524 i++, d++)
33526 if (d->name == 0)
33527 continue;
33529 ftype = (enum ix86_builtin_func_type) d->flag;
33530 def_builtin_const (d->mask, d->name, ftype, d->code);
33533 /* Add all builtins with rounding. */
33534 for (i = 0, d = bdesc_round_args;
33535 i < ARRAY_SIZE (bdesc_round_args);
33536 i++, d++)
33538 if (d->name == 0)
33539 continue;
33541 ftype = (enum ix86_builtin_func_type) d->flag;
33542 def_builtin_const (d->mask, d->name, ftype, d->code);
33545 /* pcmpestr[im] insns. */
33546 for (i = 0, d = bdesc_pcmpestr;
33547 i < ARRAY_SIZE (bdesc_pcmpestr);
33548 i++, d++)
33550 if (d->code == IX86_BUILTIN_PCMPESTRM128)
33551 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33552 else
33553 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33554 def_builtin_const (d->mask, d->name, ftype, d->code);
33557 /* pcmpistr[im] insns. */
33558 for (i = 0, d = bdesc_pcmpistr;
33559 i < ARRAY_SIZE (bdesc_pcmpistr);
33560 i++, d++)
33562 if (d->code == IX86_BUILTIN_PCMPISTRM128)
33563 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33564 else
33565 ftype = INT_FTYPE_V16QI_V16QI_INT;
33566 def_builtin_const (d->mask, d->name, ftype, d->code);
33569 /* comi/ucomi insns. */
33570 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33572 if (d->mask == OPTION_MASK_ISA_SSE2)
33573 ftype = INT_FTYPE_V2DF_V2DF;
33574 else
33575 ftype = INT_FTYPE_V4SF_V4SF;
33576 def_builtin_const (d->mask, d->name, ftype, d->code);
33579 /* SSE */
33580 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33581 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33582 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33583 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33585 /* SSE or 3DNow!A */
33586 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33587 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33588 IX86_BUILTIN_MASKMOVQ);
33590 /* SSE2 */
33591 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33592 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33594 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33595 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33596 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33597 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33599 /* SSE3. */
33600 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33601 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33602 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33603 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33605 /* AES */
33606 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33607 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33608 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33609 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33610 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33611 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33612 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33613 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33614 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33615 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33616 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33617 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33619 /* PCLMUL */
33620 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33621 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33623 /* RDRND */
33624 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33625 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33626 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33627 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33628 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33629 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33630 IX86_BUILTIN_RDRAND64_STEP);
33632 /* AVX2 */
33633 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33634 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33635 IX86_BUILTIN_GATHERSIV2DF);
33637 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33638 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33639 IX86_BUILTIN_GATHERSIV4DF);
33641 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33642 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33643 IX86_BUILTIN_GATHERDIV2DF);
33645 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33646 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33647 IX86_BUILTIN_GATHERDIV4DF);
33649 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33650 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33651 IX86_BUILTIN_GATHERSIV4SF);
33653 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33654 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33655 IX86_BUILTIN_GATHERSIV8SF);
33657 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33658 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33659 IX86_BUILTIN_GATHERDIV4SF);
33661 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33662 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33663 IX86_BUILTIN_GATHERDIV8SF);
33665 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33666 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33667 IX86_BUILTIN_GATHERSIV2DI);
33669 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33670 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33671 IX86_BUILTIN_GATHERSIV4DI);
33673 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33674 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33675 IX86_BUILTIN_GATHERDIV2DI);
33677 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33678 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33679 IX86_BUILTIN_GATHERDIV4DI);
33681 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33682 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33683 IX86_BUILTIN_GATHERSIV4SI);
33685 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33686 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33687 IX86_BUILTIN_GATHERSIV8SI);
33689 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33690 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33691 IX86_BUILTIN_GATHERDIV4SI);
33693 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
33694 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
33695 IX86_BUILTIN_GATHERDIV8SI);
33697 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
33698 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
33699 IX86_BUILTIN_GATHERALTSIV4DF);
33701 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
33702 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
33703 IX86_BUILTIN_GATHERALTDIV8SF);
33705 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
33706 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
33707 IX86_BUILTIN_GATHERALTSIV4DI);
33709 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
33710 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
33711 IX86_BUILTIN_GATHERALTDIV8SI);
33713 /* AVX512F */
33714 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
33715 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
33716 IX86_BUILTIN_GATHER3SIV16SF);
33718 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
33719 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
33720 IX86_BUILTIN_GATHER3SIV8DF);
33722 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
33723 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
33724 IX86_BUILTIN_GATHER3DIV16SF);
33726 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
33727 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
33728 IX86_BUILTIN_GATHER3DIV8DF);
33730 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
33731 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
33732 IX86_BUILTIN_GATHER3SIV16SI);
33734 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
33735 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
33736 IX86_BUILTIN_GATHER3SIV8DI);
33738 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
33739 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
33740 IX86_BUILTIN_GATHER3DIV16SI);
33742 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
33743 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
33744 IX86_BUILTIN_GATHER3DIV8DI);
33746 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
33747 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
33748 IX86_BUILTIN_GATHER3ALTSIV8DF);
33750 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
33751 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
33752 IX86_BUILTIN_GATHER3ALTDIV16SF);
33754 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
33755 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
33756 IX86_BUILTIN_GATHER3ALTSIV8DI);
33758 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
33759 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
33760 IX86_BUILTIN_GATHER3ALTDIV16SI);
33762 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
33763 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
33764 IX86_BUILTIN_SCATTERSIV16SF);
33766 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
33767 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
33768 IX86_BUILTIN_SCATTERSIV8DF);
33770 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
33771 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
33772 IX86_BUILTIN_SCATTERDIV16SF);
33774 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
33775 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
33776 IX86_BUILTIN_SCATTERDIV8DF);
33778 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
33779 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
33780 IX86_BUILTIN_SCATTERSIV16SI);
33782 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
33783 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
33784 IX86_BUILTIN_SCATTERSIV8DI);
33786 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
33787 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
33788 IX86_BUILTIN_SCATTERDIV16SI);
33790 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
33791 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
33792 IX86_BUILTIN_SCATTERDIV8DI);
33794 /* AVX512VL */
33795 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
33796 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
33797 IX86_BUILTIN_GATHER3SIV2DF);
33799 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
33800 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
33801 IX86_BUILTIN_GATHER3SIV4DF);
33803 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
33804 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
33805 IX86_BUILTIN_GATHER3DIV2DF);
33807 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
33808 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
33809 IX86_BUILTIN_GATHER3DIV4DF);
33811 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
33812 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
33813 IX86_BUILTIN_GATHER3SIV4SF);
33815 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
33816 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
33817 IX86_BUILTIN_GATHER3SIV8SF);
33819 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
33820 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
33821 IX86_BUILTIN_GATHER3DIV4SF);
33823 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
33824 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
33825 IX86_BUILTIN_GATHER3DIV8SF);
33827 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
33828 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
33829 IX86_BUILTIN_GATHER3SIV2DI);
33831 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
33832 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
33833 IX86_BUILTIN_GATHER3SIV4DI);
33835 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
33836 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
33837 IX86_BUILTIN_GATHER3DIV2DI);
33839 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
33840 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
33841 IX86_BUILTIN_GATHER3DIV4DI);
33843 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
33844 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
33845 IX86_BUILTIN_GATHER3SIV4SI);
33847 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
33848 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
33849 IX86_BUILTIN_GATHER3SIV8SI);
33851 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
33852 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
33853 IX86_BUILTIN_GATHER3DIV4SI);
33855 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
33856 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
33857 IX86_BUILTIN_GATHER3DIV8SI);
33859 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
33860 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
33861 IX86_BUILTIN_GATHER3ALTSIV4DF);
33863 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
33864 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
33865 IX86_BUILTIN_GATHER3ALTDIV8SF);
33867 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
33868 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
33869 IX86_BUILTIN_GATHER3ALTSIV4DI);
33871 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
33872 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
33873 IX86_BUILTIN_GATHER3ALTDIV8SI);
33875 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
33876 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
33877 IX86_BUILTIN_SCATTERSIV8SF);
33879 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
33880 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
33881 IX86_BUILTIN_SCATTERSIV4SF);
33883 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
33884 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
33885 IX86_BUILTIN_SCATTERSIV4DF);
33887 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
33888 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
33889 IX86_BUILTIN_SCATTERSIV2DF);
33891 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
33892 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
33893 IX86_BUILTIN_SCATTERDIV8SF);
33895 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
33896 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
33897 IX86_BUILTIN_SCATTERDIV4SF);
33899 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
33900 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
33901 IX86_BUILTIN_SCATTERDIV4DF);
33903 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
33904 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
33905 IX86_BUILTIN_SCATTERDIV2DF);
33907 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
33908 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
33909 IX86_BUILTIN_SCATTERSIV8SI);
33911 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
33912 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
33913 IX86_BUILTIN_SCATTERSIV4SI);
33915 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
33916 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
33917 IX86_BUILTIN_SCATTERSIV4DI);
33919 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
33920 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
33921 IX86_BUILTIN_SCATTERSIV2DI);
33923 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
33924 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
33925 IX86_BUILTIN_SCATTERDIV8SI);
33927 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
33928 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
33929 IX86_BUILTIN_SCATTERDIV4SI);
33931 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
33932 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
33933 IX86_BUILTIN_SCATTERDIV4DI);
33935 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
33936 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
33937 IX86_BUILTIN_SCATTERDIV2DI);
33939 /* AVX512PF */
33940 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
33941 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33942 IX86_BUILTIN_GATHERPFDPD);
33943 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
33944 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33945 IX86_BUILTIN_GATHERPFDPS);
33946 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
33947 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33948 IX86_BUILTIN_GATHERPFQPD);
33949 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
33950 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33951 IX86_BUILTIN_GATHERPFQPS);
33952 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
33953 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33954 IX86_BUILTIN_SCATTERPFDPD);
33955 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
33956 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33957 IX86_BUILTIN_SCATTERPFDPS);
33958 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
33959 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33960 IX86_BUILTIN_SCATTERPFQPD);
33961 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
33962 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33963 IX86_BUILTIN_SCATTERPFQPS);
33965 /* SHA */
33966 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
33967 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
33968 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
33969 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
33970 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
33971 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
33972 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
33973 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
33974 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
33975 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
33976 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
33977 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
33978 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
33979 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
33981 /* RTM. */
33982 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
33983 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
33985 /* MMX access to the vec_init patterns. */
33986 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
33987 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
33989 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
33990 V4HI_FTYPE_HI_HI_HI_HI,
33991 IX86_BUILTIN_VEC_INIT_V4HI);
33993 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
33994 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
33995 IX86_BUILTIN_VEC_INIT_V8QI);
33997 /* Access to the vec_extract patterns. */
33998 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
33999 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
34000 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
34001 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
34002 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
34003 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
34004 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
34005 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
34006 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
34007 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
34009 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34010 "__builtin_ia32_vec_ext_v4hi",
34011 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
34013 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
34014 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
34016 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
34017 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
34019 /* Access to the vec_set patterns. */
34020 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
34021 "__builtin_ia32_vec_set_v2di",
34022 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
34024 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
34025 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
34027 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
34028 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
34030 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
34031 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
34033 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34034 "__builtin_ia32_vec_set_v4hi",
34035 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
34037 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
34038 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
34040 /* RDSEED */
34041 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
34042 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
34043 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
34044 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
34045 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
34046 "__builtin_ia32_rdseed_di_step",
34047 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
34049 /* ADCX */
34050 def_builtin (0, "__builtin_ia32_addcarryx_u32",
34051 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
34052 def_builtin (OPTION_MASK_ISA_64BIT,
34053 "__builtin_ia32_addcarryx_u64",
34054 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34055 IX86_BUILTIN_ADDCARRYX64);
34057 /* SBB */
34058 def_builtin (0, "__builtin_ia32_sbb_u32",
34059 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
34060 def_builtin (OPTION_MASK_ISA_64BIT,
34061 "__builtin_ia32_sbb_u64",
34062 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34063 IX86_BUILTIN_SBB64);
34065 /* Read/write FLAGS. */
34066 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
34067 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34068 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
34069 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34070 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
34071 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
34072 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
34073 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
34075 /* CLFLUSHOPT. */
34076 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
34077 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
34079 /* CLWB. */
34080 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
34081 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
34083 /* Add FMA4 multi-arg argument instructions */
34084 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
34086 if (d->name == 0)
34087 continue;
34089 ftype = (enum ix86_builtin_func_type) d->flag;
34090 def_builtin_const (d->mask, d->name, ftype, d->code);
34094 static void
34095 ix86_init_mpx_builtins ()
34097 const struct builtin_description * d;
34098 enum ix86_builtin_func_type ftype;
34099 tree decl;
34100 size_t i;
34102 for (i = 0, d = bdesc_mpx;
34103 i < ARRAY_SIZE (bdesc_mpx);
34104 i++, d++)
34106 if (d->name == 0)
34107 continue;
34109 ftype = (enum ix86_builtin_func_type) d->flag;
34110 decl = def_builtin (d->mask, d->name, ftype, d->code);
34112 /* With no leaf and nothrow flags for MPX builtins
34113 abnormal edges may follow its call when setjmp
34114 presents in the function. Since we may have a lot
34115 of MPX builtins calls it causes lots of useless
34116 edges and enormous PHI nodes. To avoid this we mark
34117 MPX builtins as leaf and nothrow. */
34118 if (decl)
34120 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34121 NULL_TREE);
34122 TREE_NOTHROW (decl) = 1;
34124 else
34126 ix86_builtins_isa[(int)d->code].leaf_p = true;
34127 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34131 for (i = 0, d = bdesc_mpx_const;
34132 i < ARRAY_SIZE (bdesc_mpx_const);
34133 i++, d++)
34135 if (d->name == 0)
34136 continue;
34138 ftype = (enum ix86_builtin_func_type) d->flag;
34139 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
34141 if (decl)
34143 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34144 NULL_TREE);
34145 TREE_NOTHROW (decl) = 1;
34147 else
34149 ix86_builtins_isa[(int)d->code].leaf_p = true;
34150 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34155 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
34156 to return a pointer to VERSION_DECL if the outcome of the expression
34157 formed by PREDICATE_CHAIN is true. This function will be called during
34158 version dispatch to decide which function version to execute. It returns
34159 the basic block at the end, to which more conditions can be added. */
34161 static basic_block
34162 add_condition_to_bb (tree function_decl, tree version_decl,
34163 tree predicate_chain, basic_block new_bb)
34165 gimple return_stmt;
34166 tree convert_expr, result_var;
34167 gimple convert_stmt;
34168 gimple call_cond_stmt;
34169 gimple if_else_stmt;
34171 basic_block bb1, bb2, bb3;
34172 edge e12, e23;
34174 tree cond_var, and_expr_var = NULL_TREE;
34175 gimple_seq gseq;
34177 tree predicate_decl, predicate_arg;
34179 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
34181 gcc_assert (new_bb != NULL);
34182 gseq = bb_seq (new_bb);
34185 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
34186 build_fold_addr_expr (version_decl));
34187 result_var = create_tmp_var (ptr_type_node);
34188 convert_stmt = gimple_build_assign (result_var, convert_expr);
34189 return_stmt = gimple_build_return (result_var);
34191 if (predicate_chain == NULL_TREE)
34193 gimple_seq_add_stmt (&gseq, convert_stmt);
34194 gimple_seq_add_stmt (&gseq, return_stmt);
34195 set_bb_seq (new_bb, gseq);
34196 gimple_set_bb (convert_stmt, new_bb);
34197 gimple_set_bb (return_stmt, new_bb);
34198 pop_cfun ();
34199 return new_bb;
34202 while (predicate_chain != NULL)
34204 cond_var = create_tmp_var (integer_type_node);
34205 predicate_decl = TREE_PURPOSE (predicate_chain);
34206 predicate_arg = TREE_VALUE (predicate_chain);
34207 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
34208 gimple_call_set_lhs (call_cond_stmt, cond_var);
34210 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
34211 gimple_set_bb (call_cond_stmt, new_bb);
34212 gimple_seq_add_stmt (&gseq, call_cond_stmt);
34214 predicate_chain = TREE_CHAIN (predicate_chain);
34216 if (and_expr_var == NULL)
34217 and_expr_var = cond_var;
34218 else
34220 gimple assign_stmt;
34221 /* Use MIN_EXPR to check if any integer is zero?.
34222 and_expr_var = min_expr <cond_var, and_expr_var> */
34223 assign_stmt = gimple_build_assign (and_expr_var,
34224 build2 (MIN_EXPR, integer_type_node,
34225 cond_var, and_expr_var));
34227 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
34228 gimple_set_bb (assign_stmt, new_bb);
34229 gimple_seq_add_stmt (&gseq, assign_stmt);
34233 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34234 integer_zero_node,
34235 NULL_TREE, NULL_TREE);
34236 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34237 gimple_set_bb (if_else_stmt, new_bb);
34238 gimple_seq_add_stmt (&gseq, if_else_stmt);
34240 gimple_seq_add_stmt (&gseq, convert_stmt);
34241 gimple_seq_add_stmt (&gseq, return_stmt);
34242 set_bb_seq (new_bb, gseq);
34244 bb1 = new_bb;
34245 e12 = split_block (bb1, if_else_stmt);
34246 bb2 = e12->dest;
34247 e12->flags &= ~EDGE_FALLTHRU;
34248 e12->flags |= EDGE_TRUE_VALUE;
34250 e23 = split_block (bb2, return_stmt);
34252 gimple_set_bb (convert_stmt, bb2);
34253 gimple_set_bb (return_stmt, bb2);
34255 bb3 = e23->dest;
34256 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34258 remove_edge (e23);
34259 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34261 pop_cfun ();
34263 return bb3;
34266 /* This parses the attribute arguments to target in DECL and determines
34267 the right builtin to use to match the platform specification.
34268 It returns the priority value for this version decl. If PREDICATE_LIST
34269 is not NULL, it stores the list of cpu features that need to be checked
34270 before dispatching this function. */
34272 static unsigned int
34273 get_builtin_code_for_version (tree decl, tree *predicate_list)
34275 tree attrs;
34276 struct cl_target_option cur_target;
34277 tree target_node;
34278 struct cl_target_option *new_target;
34279 const char *arg_str = NULL;
34280 const char *attrs_str = NULL;
34281 char *tok_str = NULL;
34282 char *token;
34284 /* Priority of i386 features, greater value is higher priority. This is
34285 used to decide the order in which function dispatch must happen. For
34286 instance, a version specialized for SSE4.2 should be checked for dispatch
34287 before a version for SSE3, as SSE4.2 implies SSE3. */
34288 enum feature_priority
34290 P_ZERO = 0,
34291 P_MMX,
34292 P_SSE,
34293 P_SSE2,
34294 P_SSE3,
34295 P_SSSE3,
34296 P_PROC_SSSE3,
34297 P_SSE4_A,
34298 P_PROC_SSE4_A,
34299 P_SSE4_1,
34300 P_SSE4_2,
34301 P_PROC_SSE4_2,
34302 P_POPCNT,
34303 P_AVX,
34304 P_PROC_AVX,
34305 P_BMI,
34306 P_PROC_BMI,
34307 P_FMA4,
34308 P_XOP,
34309 P_PROC_XOP,
34310 P_FMA,
34311 P_PROC_FMA,
34312 P_BMI2,
34313 P_AVX2,
34314 P_PROC_AVX2,
34315 P_AVX512F,
34316 P_PROC_AVX512F
34319 enum feature_priority priority = P_ZERO;
34321 /* These are the target attribute strings for which a dispatcher is
34322 available, from fold_builtin_cpu. */
34324 static struct _feature_list
34326 const char *const name;
34327 const enum feature_priority priority;
34329 const feature_list[] =
34331 {"mmx", P_MMX},
34332 {"sse", P_SSE},
34333 {"sse2", P_SSE2},
34334 {"sse3", P_SSE3},
34335 {"sse4a", P_SSE4_A},
34336 {"ssse3", P_SSSE3},
34337 {"sse4.1", P_SSE4_1},
34338 {"sse4.2", P_SSE4_2},
34339 {"popcnt", P_POPCNT},
34340 {"avx", P_AVX},
34341 {"bmi", P_BMI},
34342 {"fma4", P_FMA4},
34343 {"xop", P_XOP},
34344 {"fma", P_FMA},
34345 {"bmi2", P_BMI2},
34346 {"avx2", P_AVX2},
34347 {"avx512f", P_AVX512F}
34351 static unsigned int NUM_FEATURES
34352 = sizeof (feature_list) / sizeof (struct _feature_list);
34354 unsigned int i;
34356 tree predicate_chain = NULL_TREE;
34357 tree predicate_decl, predicate_arg;
34359 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34360 gcc_assert (attrs != NULL);
34362 attrs = TREE_VALUE (TREE_VALUE (attrs));
34364 gcc_assert (TREE_CODE (attrs) == STRING_CST);
34365 attrs_str = TREE_STRING_POINTER (attrs);
34367 /* Return priority zero for default function. */
34368 if (strcmp (attrs_str, "default") == 0)
34369 return 0;
34371 /* Handle arch= if specified. For priority, set it to be 1 more than
34372 the best instruction set the processor can handle. For instance, if
34373 there is a version for atom and a version for ssse3 (the highest ISA
34374 priority for atom), the atom version must be checked for dispatch
34375 before the ssse3 version. */
34376 if (strstr (attrs_str, "arch=") != NULL)
34378 cl_target_option_save (&cur_target, &global_options);
34379 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34380 &global_options_set);
34382 gcc_assert (target_node);
34383 new_target = TREE_TARGET_OPTION (target_node);
34384 gcc_assert (new_target);
34386 if (new_target->arch_specified && new_target->arch > 0)
34388 switch (new_target->arch)
34390 case PROCESSOR_CORE2:
34391 arg_str = "core2";
34392 priority = P_PROC_SSSE3;
34393 break;
34394 case PROCESSOR_NEHALEM:
34395 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34396 arg_str = "westmere";
34397 else
34398 /* We translate "arch=corei7" and "arch=nehalem" to
34399 "corei7" so that it will be mapped to M_INTEL_COREI7
34400 as cpu type to cover all M_INTEL_COREI7_XXXs. */
34401 arg_str = "corei7";
34402 priority = P_PROC_SSE4_2;
34403 break;
34404 case PROCESSOR_SANDYBRIDGE:
34405 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34406 arg_str = "ivybridge";
34407 else
34408 arg_str = "sandybridge";
34409 priority = P_PROC_AVX;
34410 break;
34411 case PROCESSOR_HASWELL:
34412 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34413 arg_str = "broadwell";
34414 else
34415 arg_str = "haswell";
34416 priority = P_PROC_AVX2;
34417 break;
34418 case PROCESSOR_BONNELL:
34419 arg_str = "bonnell";
34420 priority = P_PROC_SSSE3;
34421 break;
34422 case PROCESSOR_KNL:
34423 arg_str = "knl";
34424 priority = P_PROC_AVX512F;
34425 break;
34426 case PROCESSOR_SILVERMONT:
34427 arg_str = "silvermont";
34428 priority = P_PROC_SSE4_2;
34429 break;
34430 case PROCESSOR_AMDFAM10:
34431 arg_str = "amdfam10h";
34432 priority = P_PROC_SSE4_A;
34433 break;
34434 case PROCESSOR_BTVER1:
34435 arg_str = "btver1";
34436 priority = P_PROC_SSE4_A;
34437 break;
34438 case PROCESSOR_BTVER2:
34439 arg_str = "btver2";
34440 priority = P_PROC_BMI;
34441 break;
34442 case PROCESSOR_BDVER1:
34443 arg_str = "bdver1";
34444 priority = P_PROC_XOP;
34445 break;
34446 case PROCESSOR_BDVER2:
34447 arg_str = "bdver2";
34448 priority = P_PROC_FMA;
34449 break;
34450 case PROCESSOR_BDVER3:
34451 arg_str = "bdver3";
34452 priority = P_PROC_FMA;
34453 break;
34454 case PROCESSOR_BDVER4:
34455 arg_str = "bdver4";
34456 priority = P_PROC_AVX2;
34457 break;
34461 cl_target_option_restore (&global_options, &cur_target);
34463 if (predicate_list && arg_str == NULL)
34465 error_at (DECL_SOURCE_LOCATION (decl),
34466 "No dispatcher found for the versioning attributes");
34467 return 0;
34470 if (predicate_list)
34472 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34473 /* For a C string literal the length includes the trailing NULL. */
34474 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34475 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34476 predicate_chain);
34480 /* Process feature name. */
34481 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
34482 strcpy (tok_str, attrs_str);
34483 token = strtok (tok_str, ",");
34484 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34486 while (token != NULL)
34488 /* Do not process "arch=" */
34489 if (strncmp (token, "arch=", 5) == 0)
34491 token = strtok (NULL, ",");
34492 continue;
34494 for (i = 0; i < NUM_FEATURES; ++i)
34496 if (strcmp (token, feature_list[i].name) == 0)
34498 if (predicate_list)
34500 predicate_arg = build_string_literal (
34501 strlen (feature_list[i].name) + 1,
34502 feature_list[i].name);
34503 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34504 predicate_chain);
34506 /* Find the maximum priority feature. */
34507 if (feature_list[i].priority > priority)
34508 priority = feature_list[i].priority;
34510 break;
34513 if (predicate_list && i == NUM_FEATURES)
34515 error_at (DECL_SOURCE_LOCATION (decl),
34516 "No dispatcher found for %s", token);
34517 return 0;
34519 token = strtok (NULL, ",");
34521 free (tok_str);
34523 if (predicate_list && predicate_chain == NULL_TREE)
34525 error_at (DECL_SOURCE_LOCATION (decl),
34526 "No dispatcher found for the versioning attributes : %s",
34527 attrs_str);
34528 return 0;
34530 else if (predicate_list)
34532 predicate_chain = nreverse (predicate_chain);
34533 *predicate_list = predicate_chain;
34536 return priority;
34539 /* This compares the priority of target features in function DECL1
34540 and DECL2. It returns positive value if DECL1 is higher priority,
34541 negative value if DECL2 is higher priority and 0 if they are the
34542 same. */
34544 static int
34545 ix86_compare_version_priority (tree decl1, tree decl2)
34547 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34548 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34550 return (int)priority1 - (int)priority2;
34553 /* V1 and V2 point to function versions with different priorities
34554 based on the target ISA. This function compares their priorities. */
34556 static int
34557 feature_compare (const void *v1, const void *v2)
34559 typedef struct _function_version_info
34561 tree version_decl;
34562 tree predicate_chain;
34563 unsigned int dispatch_priority;
34564 } function_version_info;
34566 const function_version_info c1 = *(const function_version_info *)v1;
34567 const function_version_info c2 = *(const function_version_info *)v2;
34568 return (c2.dispatch_priority - c1.dispatch_priority);
34571 /* This function generates the dispatch function for
34572 multi-versioned functions. DISPATCH_DECL is the function which will
34573 contain the dispatch logic. FNDECLS are the function choices for
34574 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
34575 in DISPATCH_DECL in which the dispatch code is generated. */
34577 static int
34578 dispatch_function_versions (tree dispatch_decl,
34579 void *fndecls_p,
34580 basic_block *empty_bb)
34582 tree default_decl;
34583 gimple ifunc_cpu_init_stmt;
34584 gimple_seq gseq;
34585 int ix;
34586 tree ele;
34587 vec<tree> *fndecls;
34588 unsigned int num_versions = 0;
34589 unsigned int actual_versions = 0;
34590 unsigned int i;
34592 struct _function_version_info
34594 tree version_decl;
34595 tree predicate_chain;
34596 unsigned int dispatch_priority;
34597 }*function_version_info;
34599 gcc_assert (dispatch_decl != NULL
34600 && fndecls_p != NULL
34601 && empty_bb != NULL);
34603 /*fndecls_p is actually a vector. */
34604 fndecls = static_cast<vec<tree> *> (fndecls_p);
34606 /* At least one more version other than the default. */
34607 num_versions = fndecls->length ();
34608 gcc_assert (num_versions >= 2);
34610 function_version_info = (struct _function_version_info *)
34611 XNEWVEC (struct _function_version_info, (num_versions - 1));
34613 /* The first version in the vector is the default decl. */
34614 default_decl = (*fndecls)[0];
34616 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34618 gseq = bb_seq (*empty_bb);
34619 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
34620 constructors, so explicity call __builtin_cpu_init here. */
34621 ifunc_cpu_init_stmt = gimple_build_call_vec (
34622 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34623 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34624 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34625 set_bb_seq (*empty_bb, gseq);
34627 pop_cfun ();
34630 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34632 tree version_decl = ele;
34633 tree predicate_chain = NULL_TREE;
34634 unsigned int priority;
34635 /* Get attribute string, parse it and find the right predicate decl.
34636 The predicate function could be a lengthy combination of many
34637 features, like arch-type and various isa-variants. */
34638 priority = get_builtin_code_for_version (version_decl,
34639 &predicate_chain);
34641 if (predicate_chain == NULL_TREE)
34642 continue;
34644 function_version_info [actual_versions].version_decl = version_decl;
34645 function_version_info [actual_versions].predicate_chain
34646 = predicate_chain;
34647 function_version_info [actual_versions].dispatch_priority = priority;
34648 actual_versions++;
34651 /* Sort the versions according to descending order of dispatch priority. The
34652 priority is based on the ISA. This is not a perfect solution. There
34653 could still be ambiguity. If more than one function version is suitable
34654 to execute, which one should be dispatched? In future, allow the user
34655 to specify a dispatch priority next to the version. */
34656 qsort (function_version_info, actual_versions,
34657 sizeof (struct _function_version_info), feature_compare);
34659 for (i = 0; i < actual_versions; ++i)
34660 *empty_bb = add_condition_to_bb (dispatch_decl,
34661 function_version_info[i].version_decl,
34662 function_version_info[i].predicate_chain,
34663 *empty_bb);
34665 /* dispatch default version at the end. */
34666 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34667 NULL, *empty_bb);
34669 free (function_version_info);
34670 return 0;
34673 /* Comparator function to be used in qsort routine to sort attribute
34674 specification strings to "target". */
34676 static int
34677 attr_strcmp (const void *v1, const void *v2)
34679 const char *c1 = *(char *const*)v1;
34680 const char *c2 = *(char *const*)v2;
34681 return strcmp (c1, c2);
34684 /* ARGLIST is the argument to target attribute. This function tokenizes
34685 the comma separated arguments, sorts them and returns a string which
34686 is a unique identifier for the comma separated arguments. It also
34687 replaces non-identifier characters "=,-" with "_". */
34689 static char *
34690 sorted_attr_string (tree arglist)
34692 tree arg;
34693 size_t str_len_sum = 0;
34694 char **args = NULL;
34695 char *attr_str, *ret_str;
34696 char *attr = NULL;
34697 unsigned int argnum = 1;
34698 unsigned int i;
34700 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34702 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34703 size_t len = strlen (str);
34704 str_len_sum += len + 1;
34705 if (arg != arglist)
34706 argnum++;
34707 for (i = 0; i < strlen (str); i++)
34708 if (str[i] == ',')
34709 argnum++;
34712 attr_str = XNEWVEC (char, str_len_sum);
34713 str_len_sum = 0;
34714 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34716 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34717 size_t len = strlen (str);
34718 memcpy (attr_str + str_len_sum, str, len);
34719 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
34720 str_len_sum += len + 1;
34723 /* Replace "=,-" with "_". */
34724 for (i = 0; i < strlen (attr_str); i++)
34725 if (attr_str[i] == '=' || attr_str[i]== '-')
34726 attr_str[i] = '_';
34728 if (argnum == 1)
34729 return attr_str;
34731 args = XNEWVEC (char *, argnum);
34733 i = 0;
34734 attr = strtok (attr_str, ",");
34735 while (attr != NULL)
34737 args[i] = attr;
34738 i++;
34739 attr = strtok (NULL, ",");
34742 qsort (args, argnum, sizeof (char *), attr_strcmp);
34744 ret_str = XNEWVEC (char, str_len_sum);
34745 str_len_sum = 0;
34746 for (i = 0; i < argnum; i++)
34748 size_t len = strlen (args[i]);
34749 memcpy (ret_str + str_len_sum, args[i], len);
34750 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
34751 str_len_sum += len + 1;
34754 XDELETEVEC (args);
34755 XDELETEVEC (attr_str);
34756 return ret_str;
34759 /* This function changes the assembler name for functions that are
34760 versions. If DECL is a function version and has a "target"
34761 attribute, it appends the attribute string to its assembler name. */
34763 static tree
34764 ix86_mangle_function_version_assembler_name (tree decl, tree id)
34766 tree version_attr;
34767 const char *orig_name, *version_string;
34768 char *attr_str, *assembler_name;
34770 if (DECL_DECLARED_INLINE_P (decl)
34771 && lookup_attribute ("gnu_inline",
34772 DECL_ATTRIBUTES (decl)))
34773 error_at (DECL_SOURCE_LOCATION (decl),
34774 "Function versions cannot be marked as gnu_inline,"
34775 " bodies have to be generated");
34777 if (DECL_VIRTUAL_P (decl)
34778 || DECL_VINDEX (decl))
34779 sorry ("Virtual function multiversioning not supported");
34781 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34783 /* target attribute string cannot be NULL. */
34784 gcc_assert (version_attr != NULL_TREE);
34786 orig_name = IDENTIFIER_POINTER (id);
34787 version_string
34788 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
34790 if (strcmp (version_string, "default") == 0)
34791 return id;
34793 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
34794 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
34796 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
34798 /* Allow assembler name to be modified if already set. */
34799 if (DECL_ASSEMBLER_NAME_SET_P (decl))
34800 SET_DECL_RTL (decl, NULL);
34802 tree ret = get_identifier (assembler_name);
34803 XDELETEVEC (attr_str);
34804 XDELETEVEC (assembler_name);
34805 return ret;
34808 /* This function returns true if FN1 and FN2 are versions of the same function,
34809 that is, the target strings of the function decls are different. This assumes
34810 that FN1 and FN2 have the same signature. */
34812 static bool
34813 ix86_function_versions (tree fn1, tree fn2)
34815 tree attr1, attr2;
34816 char *target1, *target2;
34817 bool result;
34819 if (TREE_CODE (fn1) != FUNCTION_DECL
34820 || TREE_CODE (fn2) != FUNCTION_DECL)
34821 return false;
34823 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
34824 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
34826 /* At least one function decl should have the target attribute specified. */
34827 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
34828 return false;
34830 /* Diagnose missing target attribute if one of the decls is already
34831 multi-versioned. */
34832 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
34834 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
34836 if (attr2 != NULL_TREE)
34838 tree tem = fn1;
34839 fn1 = fn2;
34840 fn2 = tem;
34841 attr1 = attr2;
34843 error_at (DECL_SOURCE_LOCATION (fn2),
34844 "missing %<target%> attribute for multi-versioned %D",
34845 fn2);
34846 inform (DECL_SOURCE_LOCATION (fn1),
34847 "previous declaration of %D", fn1);
34848 /* Prevent diagnosing of the same error multiple times. */
34849 DECL_ATTRIBUTES (fn2)
34850 = tree_cons (get_identifier ("target"),
34851 copy_node (TREE_VALUE (attr1)),
34852 DECL_ATTRIBUTES (fn2));
34854 return false;
34857 target1 = sorted_attr_string (TREE_VALUE (attr1));
34858 target2 = sorted_attr_string (TREE_VALUE (attr2));
34860 /* The sorted target strings must be different for fn1 and fn2
34861 to be versions. */
34862 if (strcmp (target1, target2) == 0)
34863 result = false;
34864 else
34865 result = true;
34867 XDELETEVEC (target1);
34868 XDELETEVEC (target2);
34870 return result;
34873 static tree
34874 ix86_mangle_decl_assembler_name (tree decl, tree id)
34876 /* For function version, add the target suffix to the assembler name. */
34877 if (TREE_CODE (decl) == FUNCTION_DECL
34878 && DECL_FUNCTION_VERSIONED (decl))
34879 id = ix86_mangle_function_version_assembler_name (decl, id);
34880 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
34881 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
34882 #endif
34884 return id;
34887 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
34888 is true, append the full path name of the source file. */
34890 static char *
34891 make_name (tree decl, const char *suffix, bool make_unique)
34893 char *global_var_name;
34894 int name_len;
34895 const char *name;
34896 const char *unique_name = NULL;
34898 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
34900 /* Get a unique name that can be used globally without any chances
34901 of collision at link time. */
34902 if (make_unique)
34903 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
34905 name_len = strlen (name) + strlen (suffix) + 2;
34907 if (make_unique)
34908 name_len += strlen (unique_name) + 1;
34909 global_var_name = XNEWVEC (char, name_len);
34911 /* Use '.' to concatenate names as it is demangler friendly. */
34912 if (make_unique)
34913 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
34914 suffix);
34915 else
34916 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
34918 return global_var_name;
34921 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
34923 /* Make a dispatcher declaration for the multi-versioned function DECL.
34924 Calls to DECL function will be replaced with calls to the dispatcher
34925 by the front-end. Return the decl created. */
34927 static tree
34928 make_dispatcher_decl (const tree decl)
34930 tree func_decl;
34931 char *func_name;
34932 tree fn_type, func_type;
34933 bool is_uniq = false;
34935 if (TREE_PUBLIC (decl) == 0)
34936 is_uniq = true;
34938 func_name = make_name (decl, "ifunc", is_uniq);
34940 fn_type = TREE_TYPE (decl);
34941 func_type = build_function_type (TREE_TYPE (fn_type),
34942 TYPE_ARG_TYPES (fn_type));
34944 func_decl = build_fn_decl (func_name, func_type);
34945 XDELETEVEC (func_name);
34946 TREE_USED (func_decl) = 1;
34947 DECL_CONTEXT (func_decl) = NULL_TREE;
34948 DECL_INITIAL (func_decl) = error_mark_node;
34949 DECL_ARTIFICIAL (func_decl) = 1;
34950 /* Mark this func as external, the resolver will flip it again if
34951 it gets generated. */
34952 DECL_EXTERNAL (func_decl) = 1;
34953 /* This will be of type IFUNCs have to be externally visible. */
34954 TREE_PUBLIC (func_decl) = 1;
34956 return func_decl;
34959 #endif
34961 /* Returns true if decl is multi-versioned and DECL is the default function,
34962 that is it is not tagged with target specific optimization. */
34964 static bool
34965 is_function_default_version (const tree decl)
34967 if (TREE_CODE (decl) != FUNCTION_DECL
34968 || !DECL_FUNCTION_VERSIONED (decl))
34969 return false;
34970 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34971 gcc_assert (attr);
34972 attr = TREE_VALUE (TREE_VALUE (attr));
34973 return (TREE_CODE (attr) == STRING_CST
34974 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
34977 /* Make a dispatcher declaration for the multi-versioned function DECL.
34978 Calls to DECL function will be replaced with calls to the dispatcher
34979 by the front-end. Returns the decl of the dispatcher function. */
34981 static tree
34982 ix86_get_function_versions_dispatcher (void *decl)
34984 tree fn = (tree) decl;
34985 struct cgraph_node *node = NULL;
34986 struct cgraph_node *default_node = NULL;
34987 struct cgraph_function_version_info *node_v = NULL;
34988 struct cgraph_function_version_info *first_v = NULL;
34990 tree dispatch_decl = NULL;
34992 struct cgraph_function_version_info *default_version_info = NULL;
34994 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
34996 node = cgraph_node::get (fn);
34997 gcc_assert (node != NULL);
34999 node_v = node->function_version ();
35000 gcc_assert (node_v != NULL);
35002 if (node_v->dispatcher_resolver != NULL)
35003 return node_v->dispatcher_resolver;
35005 /* Find the default version and make it the first node. */
35006 first_v = node_v;
35007 /* Go to the beginning of the chain. */
35008 while (first_v->prev != NULL)
35009 first_v = first_v->prev;
35010 default_version_info = first_v;
35011 while (default_version_info != NULL)
35013 if (is_function_default_version
35014 (default_version_info->this_node->decl))
35015 break;
35016 default_version_info = default_version_info->next;
35019 /* If there is no default node, just return NULL. */
35020 if (default_version_info == NULL)
35021 return NULL;
35023 /* Make default info the first node. */
35024 if (first_v != default_version_info)
35026 default_version_info->prev->next = default_version_info->next;
35027 if (default_version_info->next)
35028 default_version_info->next->prev = default_version_info->prev;
35029 first_v->prev = default_version_info;
35030 default_version_info->next = first_v;
35031 default_version_info->prev = NULL;
35034 default_node = default_version_info->this_node;
35036 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35037 if (targetm.has_ifunc_p ())
35039 struct cgraph_function_version_info *it_v = NULL;
35040 struct cgraph_node *dispatcher_node = NULL;
35041 struct cgraph_function_version_info *dispatcher_version_info = NULL;
35043 /* Right now, the dispatching is done via ifunc. */
35044 dispatch_decl = make_dispatcher_decl (default_node->decl);
35046 dispatcher_node = cgraph_node::get_create (dispatch_decl);
35047 gcc_assert (dispatcher_node != NULL);
35048 dispatcher_node->dispatcher_function = 1;
35049 dispatcher_version_info
35050 = dispatcher_node->insert_new_function_version ();
35051 dispatcher_version_info->next = default_version_info;
35052 dispatcher_node->definition = 1;
35054 /* Set the dispatcher for all the versions. */
35055 it_v = default_version_info;
35056 while (it_v != NULL)
35058 it_v->dispatcher_resolver = dispatch_decl;
35059 it_v = it_v->next;
35062 else
35063 #endif
35065 error_at (DECL_SOURCE_LOCATION (default_node->decl),
35066 "multiversioning needs ifunc which is not supported "
35067 "on this target");
35070 return dispatch_decl;
35073 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
35074 it to CHAIN. */
35076 static tree
35077 make_attribute (const char *name, const char *arg_name, tree chain)
35079 tree attr_name;
35080 tree attr_arg_name;
35081 tree attr_args;
35082 tree attr;
35084 attr_name = get_identifier (name);
35085 attr_arg_name = build_string (strlen (arg_name), arg_name);
35086 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
35087 attr = tree_cons (attr_name, attr_args, chain);
35088 return attr;
35091 /* Make the resolver function decl to dispatch the versions of
35092 a multi-versioned function, DEFAULT_DECL. Create an
35093 empty basic block in the resolver and store the pointer in
35094 EMPTY_BB. Return the decl of the resolver function. */
35096 static tree
35097 make_resolver_func (const tree default_decl,
35098 const tree dispatch_decl,
35099 basic_block *empty_bb)
35101 char *resolver_name;
35102 tree decl, type, decl_name, t;
35103 bool is_uniq = false;
35105 /* IFUNC's have to be globally visible. So, if the default_decl is
35106 not, then the name of the IFUNC should be made unique. */
35107 if (TREE_PUBLIC (default_decl) == 0)
35108 is_uniq = true;
35110 /* Append the filename to the resolver function if the versions are
35111 not externally visible. This is because the resolver function has
35112 to be externally visible for the loader to find it. So, appending
35113 the filename will prevent conflicts with a resolver function from
35114 another module which is based on the same version name. */
35115 resolver_name = make_name (default_decl, "resolver", is_uniq);
35117 /* The resolver function should return a (void *). */
35118 type = build_function_type_list (ptr_type_node, NULL_TREE);
35120 decl = build_fn_decl (resolver_name, type);
35121 decl_name = get_identifier (resolver_name);
35122 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
35124 DECL_NAME (decl) = decl_name;
35125 TREE_USED (decl) = 1;
35126 DECL_ARTIFICIAL (decl) = 1;
35127 DECL_IGNORED_P (decl) = 0;
35128 /* IFUNC resolvers have to be externally visible. */
35129 TREE_PUBLIC (decl) = 1;
35130 DECL_UNINLINABLE (decl) = 1;
35132 /* Resolver is not external, body is generated. */
35133 DECL_EXTERNAL (decl) = 0;
35134 DECL_EXTERNAL (dispatch_decl) = 0;
35136 DECL_CONTEXT (decl) = NULL_TREE;
35137 DECL_INITIAL (decl) = make_node (BLOCK);
35138 DECL_STATIC_CONSTRUCTOR (decl) = 0;
35140 if (DECL_COMDAT_GROUP (default_decl)
35141 || TREE_PUBLIC (default_decl))
35143 /* In this case, each translation unit with a call to this
35144 versioned function will put out a resolver. Ensure it
35145 is comdat to keep just one copy. */
35146 DECL_COMDAT (decl) = 1;
35147 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
35149 /* Build result decl and add to function_decl. */
35150 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
35151 DECL_ARTIFICIAL (t) = 1;
35152 DECL_IGNORED_P (t) = 1;
35153 DECL_RESULT (decl) = t;
35155 gimplify_function_tree (decl);
35156 push_cfun (DECL_STRUCT_FUNCTION (decl));
35157 *empty_bb = init_lowered_empty_function (decl, false, 0);
35159 cgraph_node::add_new_function (decl, true);
35160 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
35162 pop_cfun ();
35164 gcc_assert (dispatch_decl != NULL);
35165 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
35166 DECL_ATTRIBUTES (dispatch_decl)
35167 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
35169 /* Create the alias for dispatch to resolver here. */
35170 /*cgraph_create_function_alias (dispatch_decl, decl);*/
35171 cgraph_node::create_same_body_alias (dispatch_decl, decl);
35172 XDELETEVEC (resolver_name);
35173 return decl;
35176 /* Generate the dispatching code body to dispatch multi-versioned function
35177 DECL. The target hook is called to process the "target" attributes and
35178 provide the code to dispatch the right function at run-time. NODE points
35179 to the dispatcher decl whose body will be created. */
35181 static tree
35182 ix86_generate_version_dispatcher_body (void *node_p)
35184 tree resolver_decl;
35185 basic_block empty_bb;
35186 tree default_ver_decl;
35187 struct cgraph_node *versn;
35188 struct cgraph_node *node;
35190 struct cgraph_function_version_info *node_version_info = NULL;
35191 struct cgraph_function_version_info *versn_info = NULL;
35193 node = (cgraph_node *)node_p;
35195 node_version_info = node->function_version ();
35196 gcc_assert (node->dispatcher_function
35197 && node_version_info != NULL);
35199 if (node_version_info->dispatcher_resolver)
35200 return node_version_info->dispatcher_resolver;
35202 /* The first version in the chain corresponds to the default version. */
35203 default_ver_decl = node_version_info->next->this_node->decl;
35205 /* node is going to be an alias, so remove the finalized bit. */
35206 node->definition = false;
35208 resolver_decl = make_resolver_func (default_ver_decl,
35209 node->decl, &empty_bb);
35211 node_version_info->dispatcher_resolver = resolver_decl;
35213 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
35215 auto_vec<tree, 2> fn_ver_vec;
35217 for (versn_info = node_version_info->next; versn_info;
35218 versn_info = versn_info->next)
35220 versn = versn_info->this_node;
35221 /* Check for virtual functions here again, as by this time it should
35222 have been determined if this function needs a vtable index or
35223 not. This happens for methods in derived classes that override
35224 virtual methods in base classes but are not explicitly marked as
35225 virtual. */
35226 if (DECL_VINDEX (versn->decl))
35227 sorry ("Virtual function multiversioning not supported");
35229 fn_ver_vec.safe_push (versn->decl);
35232 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
35233 cgraph_edge::rebuild_edges ();
35234 pop_cfun ();
35235 return resolver_decl;
35237 /* This builds the processor_model struct type defined in
35238 libgcc/config/i386/cpuinfo.c */
35240 static tree
35241 build_processor_model_struct (void)
35243 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
35244 "__cpu_features"};
35245 tree field = NULL_TREE, field_chain = NULL_TREE;
35246 int i;
35247 tree type = make_node (RECORD_TYPE);
35249 /* The first 3 fields are unsigned int. */
35250 for (i = 0; i < 3; ++i)
35252 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35253 get_identifier (field_name[i]), unsigned_type_node);
35254 if (field_chain != NULL_TREE)
35255 DECL_CHAIN (field) = field_chain;
35256 field_chain = field;
35259 /* The last field is an array of unsigned integers of size one. */
35260 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35261 get_identifier (field_name[3]),
35262 build_array_type (unsigned_type_node,
35263 build_index_type (size_one_node)));
35264 if (field_chain != NULL_TREE)
35265 DECL_CHAIN (field) = field_chain;
35266 field_chain = field;
35268 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35269 return type;
35272 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35274 static tree
35275 make_var_decl (tree type, const char *name)
35277 tree new_decl;
35279 new_decl = build_decl (UNKNOWN_LOCATION,
35280 VAR_DECL,
35281 get_identifier(name),
35282 type);
35284 DECL_EXTERNAL (new_decl) = 1;
35285 TREE_STATIC (new_decl) = 1;
35286 TREE_PUBLIC (new_decl) = 1;
35287 DECL_INITIAL (new_decl) = 0;
35288 DECL_ARTIFICIAL (new_decl) = 0;
35289 DECL_PRESERVE_P (new_decl) = 1;
35291 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35292 assemble_variable (new_decl, 0, 0, 0);
35294 return new_decl;
35297 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35298 into an integer defined in libgcc/config/i386/cpuinfo.c */
35300 static tree
35301 fold_builtin_cpu (tree fndecl, tree *args)
35303 unsigned int i;
35304 enum ix86_builtins fn_code = (enum ix86_builtins)
35305 DECL_FUNCTION_CODE (fndecl);
35306 tree param_string_cst = NULL;
35308 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35309 enum processor_features
35311 F_CMOV = 0,
35312 F_MMX,
35313 F_POPCNT,
35314 F_SSE,
35315 F_SSE2,
35316 F_SSE3,
35317 F_SSSE3,
35318 F_SSE4_1,
35319 F_SSE4_2,
35320 F_AVX,
35321 F_AVX2,
35322 F_SSE4_A,
35323 F_FMA4,
35324 F_XOP,
35325 F_FMA,
35326 F_AVX512F,
35327 F_BMI,
35328 F_BMI2,
35329 F_MAX
35332 /* These are the values for vendor types and cpu types and subtypes
35333 in cpuinfo.c. Cpu types and subtypes should be subtracted by
35334 the corresponding start value. */
35335 enum processor_model
35337 M_INTEL = 1,
35338 M_AMD,
35339 M_CPU_TYPE_START,
35340 M_INTEL_BONNELL,
35341 M_INTEL_CORE2,
35342 M_INTEL_COREI7,
35343 M_AMDFAM10H,
35344 M_AMDFAM15H,
35345 M_INTEL_SILVERMONT,
35346 M_INTEL_KNL,
35347 M_AMD_BTVER1,
35348 M_AMD_BTVER2,
35349 M_CPU_SUBTYPE_START,
35350 M_INTEL_COREI7_NEHALEM,
35351 M_INTEL_COREI7_WESTMERE,
35352 M_INTEL_COREI7_SANDYBRIDGE,
35353 M_AMDFAM10H_BARCELONA,
35354 M_AMDFAM10H_SHANGHAI,
35355 M_AMDFAM10H_ISTANBUL,
35356 M_AMDFAM15H_BDVER1,
35357 M_AMDFAM15H_BDVER2,
35358 M_AMDFAM15H_BDVER3,
35359 M_AMDFAM15H_BDVER4,
35360 M_INTEL_COREI7_IVYBRIDGE,
35361 M_INTEL_COREI7_HASWELL,
35362 M_INTEL_COREI7_BROADWELL
35365 static struct _arch_names_table
35367 const char *const name;
35368 const enum processor_model model;
35370 const arch_names_table[] =
35372 {"amd", M_AMD},
35373 {"intel", M_INTEL},
35374 {"atom", M_INTEL_BONNELL},
35375 {"slm", M_INTEL_SILVERMONT},
35376 {"core2", M_INTEL_CORE2},
35377 {"corei7", M_INTEL_COREI7},
35378 {"nehalem", M_INTEL_COREI7_NEHALEM},
35379 {"westmere", M_INTEL_COREI7_WESTMERE},
35380 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35381 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35382 {"haswell", M_INTEL_COREI7_HASWELL},
35383 {"broadwell", M_INTEL_COREI7_BROADWELL},
35384 {"bonnell", M_INTEL_BONNELL},
35385 {"silvermont", M_INTEL_SILVERMONT},
35386 {"knl", M_INTEL_KNL},
35387 {"amdfam10h", M_AMDFAM10H},
35388 {"barcelona", M_AMDFAM10H_BARCELONA},
35389 {"shanghai", M_AMDFAM10H_SHANGHAI},
35390 {"istanbul", M_AMDFAM10H_ISTANBUL},
35391 {"btver1", M_AMD_BTVER1},
35392 {"amdfam15h", M_AMDFAM15H},
35393 {"bdver1", M_AMDFAM15H_BDVER1},
35394 {"bdver2", M_AMDFAM15H_BDVER2},
35395 {"bdver3", M_AMDFAM15H_BDVER3},
35396 {"bdver4", M_AMDFAM15H_BDVER4},
35397 {"btver2", M_AMD_BTVER2},
35400 static struct _isa_names_table
35402 const char *const name;
35403 const enum processor_features feature;
35405 const isa_names_table[] =
35407 {"cmov", F_CMOV},
35408 {"mmx", F_MMX},
35409 {"popcnt", F_POPCNT},
35410 {"sse", F_SSE},
35411 {"sse2", F_SSE2},
35412 {"sse3", F_SSE3},
35413 {"ssse3", F_SSSE3},
35414 {"sse4a", F_SSE4_A},
35415 {"sse4.1", F_SSE4_1},
35416 {"sse4.2", F_SSE4_2},
35417 {"avx", F_AVX},
35418 {"fma4", F_FMA4},
35419 {"xop", F_XOP},
35420 {"fma", F_FMA},
35421 {"avx2", F_AVX2},
35422 {"avx512f",F_AVX512F},
35423 {"bmi", F_BMI},
35424 {"bmi2", F_BMI2}
35427 tree __processor_model_type = build_processor_model_struct ();
35428 tree __cpu_model_var = make_var_decl (__processor_model_type,
35429 "__cpu_model");
35432 varpool_node::add (__cpu_model_var);
35434 gcc_assert ((args != NULL) && (*args != NULL));
35436 param_string_cst = *args;
35437 while (param_string_cst
35438 && TREE_CODE (param_string_cst) != STRING_CST)
35440 /* *args must be a expr that can contain other EXPRS leading to a
35441 STRING_CST. */
35442 if (!EXPR_P (param_string_cst))
35444 error ("Parameter to builtin must be a string constant or literal");
35445 return integer_zero_node;
35447 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35450 gcc_assert (param_string_cst);
35452 if (fn_code == IX86_BUILTIN_CPU_IS)
35454 tree ref;
35455 tree field;
35456 tree final;
35458 unsigned int field_val = 0;
35459 unsigned int NUM_ARCH_NAMES
35460 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35462 for (i = 0; i < NUM_ARCH_NAMES; i++)
35463 if (strcmp (arch_names_table[i].name,
35464 TREE_STRING_POINTER (param_string_cst)) == 0)
35465 break;
35467 if (i == NUM_ARCH_NAMES)
35469 error ("Parameter to builtin not valid: %s",
35470 TREE_STRING_POINTER (param_string_cst));
35471 return integer_zero_node;
35474 field = TYPE_FIELDS (__processor_model_type);
35475 field_val = arch_names_table[i].model;
35477 /* CPU types are stored in the next field. */
35478 if (field_val > M_CPU_TYPE_START
35479 && field_val < M_CPU_SUBTYPE_START)
35481 field = DECL_CHAIN (field);
35482 field_val -= M_CPU_TYPE_START;
35485 /* CPU subtypes are stored in the next field. */
35486 if (field_val > M_CPU_SUBTYPE_START)
35488 field = DECL_CHAIN ( DECL_CHAIN (field));
35489 field_val -= M_CPU_SUBTYPE_START;
35492 /* Get the appropriate field in __cpu_model. */
35493 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35494 field, NULL_TREE);
35496 /* Check the value. */
35497 final = build2 (EQ_EXPR, unsigned_type_node, ref,
35498 build_int_cstu (unsigned_type_node, field_val));
35499 return build1 (CONVERT_EXPR, integer_type_node, final);
35501 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35503 tree ref;
35504 tree array_elt;
35505 tree field;
35506 tree final;
35508 unsigned int field_val = 0;
35509 unsigned int NUM_ISA_NAMES
35510 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35512 for (i = 0; i < NUM_ISA_NAMES; i++)
35513 if (strcmp (isa_names_table[i].name,
35514 TREE_STRING_POINTER (param_string_cst)) == 0)
35515 break;
35517 if (i == NUM_ISA_NAMES)
35519 error ("Parameter to builtin not valid: %s",
35520 TREE_STRING_POINTER (param_string_cst));
35521 return integer_zero_node;
35524 field = TYPE_FIELDS (__processor_model_type);
35525 /* Get the last field, which is __cpu_features. */
35526 while (DECL_CHAIN (field))
35527 field = DECL_CHAIN (field);
35529 /* Get the appropriate field: __cpu_model.__cpu_features */
35530 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35531 field, NULL_TREE);
35533 /* Access the 0th element of __cpu_features array. */
35534 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35535 integer_zero_node, NULL_TREE, NULL_TREE);
35537 field_val = (1 << isa_names_table[i].feature);
35538 /* Return __cpu_model.__cpu_features[0] & field_val */
35539 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35540 build_int_cstu (unsigned_type_node, field_val));
35541 return build1 (CONVERT_EXPR, integer_type_node, final);
35543 gcc_unreachable ();
35546 static tree
35547 ix86_fold_builtin (tree fndecl, int n_args,
35548 tree *args, bool ignore ATTRIBUTE_UNUSED)
35550 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35552 enum ix86_builtins fn_code = (enum ix86_builtins)
35553 DECL_FUNCTION_CODE (fndecl);
35554 if (fn_code == IX86_BUILTIN_CPU_IS
35555 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35557 gcc_assert (n_args == 1);
35558 return fold_builtin_cpu (fndecl, args);
35562 #ifdef SUBTARGET_FOLD_BUILTIN
35563 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35564 #endif
35566 return NULL_TREE;
35569 /* Make builtins to detect cpu type and features supported. NAME is
35570 the builtin name, CODE is the builtin code, and FTYPE is the function
35571 type of the builtin. */
35573 static void
35574 make_cpu_type_builtin (const char* name, int code,
35575 enum ix86_builtin_func_type ftype, bool is_const)
35577 tree decl;
35578 tree type;
35580 type = ix86_get_builtin_func_type (ftype);
35581 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35582 NULL, NULL_TREE);
35583 gcc_assert (decl != NULL_TREE);
35584 ix86_builtins[(int) code] = decl;
35585 TREE_READONLY (decl) = is_const;
35588 /* Make builtins to get CPU type and features supported. The created
35589 builtins are :
35591 __builtin_cpu_init (), to detect cpu type and features,
35592 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35593 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35596 static void
35597 ix86_init_platform_type_builtins (void)
35599 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35600 INT_FTYPE_VOID, false);
35601 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35602 INT_FTYPE_PCCHAR, true);
35603 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35604 INT_FTYPE_PCCHAR, true);
35607 /* Internal method for ix86_init_builtins. */
35609 static void
35610 ix86_init_builtins_va_builtins_abi (void)
35612 tree ms_va_ref, sysv_va_ref;
35613 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35614 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35615 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35616 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35618 if (!TARGET_64BIT)
35619 return;
35620 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35621 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35622 ms_va_ref = build_reference_type (ms_va_list_type_node);
35623 sysv_va_ref =
35624 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35626 fnvoid_va_end_ms =
35627 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35628 fnvoid_va_start_ms =
35629 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35630 fnvoid_va_end_sysv =
35631 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35632 fnvoid_va_start_sysv =
35633 build_varargs_function_type_list (void_type_node, sysv_va_ref,
35634 NULL_TREE);
35635 fnvoid_va_copy_ms =
35636 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35637 NULL_TREE);
35638 fnvoid_va_copy_sysv =
35639 build_function_type_list (void_type_node, sysv_va_ref,
35640 sysv_va_ref, NULL_TREE);
35642 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35643 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35644 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35645 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35646 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35647 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35648 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35649 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35650 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35651 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35652 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35653 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35656 static void
35657 ix86_init_builtin_types (void)
35659 tree float128_type_node, float80_type_node;
35661 /* The __float80 type. */
35662 float80_type_node = long_double_type_node;
35663 if (TYPE_MODE (float80_type_node) != XFmode)
35665 /* The __float80 type. */
35666 float80_type_node = make_node (REAL_TYPE);
35668 TYPE_PRECISION (float80_type_node) = 80;
35669 layout_type (float80_type_node);
35671 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35673 /* The __float128 type. */
35674 float128_type_node = make_node (REAL_TYPE);
35675 TYPE_PRECISION (float128_type_node) = 128;
35676 layout_type (float128_type_node);
35677 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35679 /* This macro is built by i386-builtin-types.awk. */
35680 DEFINE_BUILTIN_PRIMITIVE_TYPES;
35683 static void
35684 ix86_init_builtins (void)
35686 tree t;
35688 ix86_init_builtin_types ();
35690 /* Builtins to get CPU type and features. */
35691 ix86_init_platform_type_builtins ();
35693 /* TFmode support builtins. */
35694 def_builtin_const (0, "__builtin_infq",
35695 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
35696 def_builtin_const (0, "__builtin_huge_valq",
35697 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
35699 /* We will expand them to normal call if SSE isn't available since
35700 they are used by libgcc. */
35701 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
35702 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
35703 BUILT_IN_MD, "__fabstf2", NULL_TREE);
35704 TREE_READONLY (t) = 1;
35705 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
35707 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
35708 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
35709 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
35710 TREE_READONLY (t) = 1;
35711 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
35713 ix86_init_tm_builtins ();
35714 ix86_init_mmx_sse_builtins ();
35715 ix86_init_mpx_builtins ();
35717 if (TARGET_LP64)
35718 ix86_init_builtins_va_builtins_abi ();
35720 #ifdef SUBTARGET_INIT_BUILTINS
35721 SUBTARGET_INIT_BUILTINS;
35722 #endif
35725 /* Return the ix86 builtin for CODE. */
35727 static tree
35728 ix86_builtin_decl (unsigned code, bool)
35730 if (code >= IX86_BUILTIN_MAX)
35731 return error_mark_node;
35733 return ix86_builtins[code];
35736 /* Errors in the source file can cause expand_expr to return const0_rtx
35737 where we expect a vector. To avoid crashing, use one of the vector
35738 clear instructions. */
35739 static rtx
35740 safe_vector_operand (rtx x, machine_mode mode)
35742 if (x == const0_rtx)
35743 x = CONST0_RTX (mode);
35744 return x;
35747 /* Fixup modeless constants to fit required mode. */
35748 static rtx
35749 fixup_modeless_constant (rtx x, machine_mode mode)
35751 if (GET_MODE (x) == VOIDmode)
35752 x = convert_to_mode (mode, x, 1);
35753 return x;
35756 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
35758 static rtx
35759 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
35761 rtx pat;
35762 tree arg0 = CALL_EXPR_ARG (exp, 0);
35763 tree arg1 = CALL_EXPR_ARG (exp, 1);
35764 rtx op0 = expand_normal (arg0);
35765 rtx op1 = expand_normal (arg1);
35766 machine_mode tmode = insn_data[icode].operand[0].mode;
35767 machine_mode mode0 = insn_data[icode].operand[1].mode;
35768 machine_mode mode1 = insn_data[icode].operand[2].mode;
35770 if (VECTOR_MODE_P (mode0))
35771 op0 = safe_vector_operand (op0, mode0);
35772 if (VECTOR_MODE_P (mode1))
35773 op1 = safe_vector_operand (op1, mode1);
35775 if (optimize || !target
35776 || GET_MODE (target) != tmode
35777 || !insn_data[icode].operand[0].predicate (target, tmode))
35778 target = gen_reg_rtx (tmode);
35780 if (GET_MODE (op1) == SImode && mode1 == TImode)
35782 rtx x = gen_reg_rtx (V4SImode);
35783 emit_insn (gen_sse2_loadd (x, op1));
35784 op1 = gen_lowpart (TImode, x);
35787 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35788 op0 = copy_to_mode_reg (mode0, op0);
35789 if (!insn_data[icode].operand[2].predicate (op1, mode1))
35790 op1 = copy_to_mode_reg (mode1, op1);
35792 pat = GEN_FCN (icode) (target, op0, op1);
35793 if (! pat)
35794 return 0;
35796 emit_insn (pat);
35798 return target;
35801 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
35803 static rtx
35804 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
35805 enum ix86_builtin_func_type m_type,
35806 enum rtx_code sub_code)
35808 rtx pat;
35809 int i;
35810 int nargs;
35811 bool comparison_p = false;
35812 bool tf_p = false;
35813 bool last_arg_constant = false;
35814 int num_memory = 0;
35815 struct {
35816 rtx op;
35817 machine_mode mode;
35818 } args[4];
35820 machine_mode tmode = insn_data[icode].operand[0].mode;
35822 switch (m_type)
35824 case MULTI_ARG_4_DF2_DI_I:
35825 case MULTI_ARG_4_DF2_DI_I1:
35826 case MULTI_ARG_4_SF2_SI_I:
35827 case MULTI_ARG_4_SF2_SI_I1:
35828 nargs = 4;
35829 last_arg_constant = true;
35830 break;
35832 case MULTI_ARG_3_SF:
35833 case MULTI_ARG_3_DF:
35834 case MULTI_ARG_3_SF2:
35835 case MULTI_ARG_3_DF2:
35836 case MULTI_ARG_3_DI:
35837 case MULTI_ARG_3_SI:
35838 case MULTI_ARG_3_SI_DI:
35839 case MULTI_ARG_3_HI:
35840 case MULTI_ARG_3_HI_SI:
35841 case MULTI_ARG_3_QI:
35842 case MULTI_ARG_3_DI2:
35843 case MULTI_ARG_3_SI2:
35844 case MULTI_ARG_3_HI2:
35845 case MULTI_ARG_3_QI2:
35846 nargs = 3;
35847 break;
35849 case MULTI_ARG_2_SF:
35850 case MULTI_ARG_2_DF:
35851 case MULTI_ARG_2_DI:
35852 case MULTI_ARG_2_SI:
35853 case MULTI_ARG_2_HI:
35854 case MULTI_ARG_2_QI:
35855 nargs = 2;
35856 break;
35858 case MULTI_ARG_2_DI_IMM:
35859 case MULTI_ARG_2_SI_IMM:
35860 case MULTI_ARG_2_HI_IMM:
35861 case MULTI_ARG_2_QI_IMM:
35862 nargs = 2;
35863 last_arg_constant = true;
35864 break;
35866 case MULTI_ARG_1_SF:
35867 case MULTI_ARG_1_DF:
35868 case MULTI_ARG_1_SF2:
35869 case MULTI_ARG_1_DF2:
35870 case MULTI_ARG_1_DI:
35871 case MULTI_ARG_1_SI:
35872 case MULTI_ARG_1_HI:
35873 case MULTI_ARG_1_QI:
35874 case MULTI_ARG_1_SI_DI:
35875 case MULTI_ARG_1_HI_DI:
35876 case MULTI_ARG_1_HI_SI:
35877 case MULTI_ARG_1_QI_DI:
35878 case MULTI_ARG_1_QI_SI:
35879 case MULTI_ARG_1_QI_HI:
35880 nargs = 1;
35881 break;
35883 case MULTI_ARG_2_DI_CMP:
35884 case MULTI_ARG_2_SI_CMP:
35885 case MULTI_ARG_2_HI_CMP:
35886 case MULTI_ARG_2_QI_CMP:
35887 nargs = 2;
35888 comparison_p = true;
35889 break;
35891 case MULTI_ARG_2_SF_TF:
35892 case MULTI_ARG_2_DF_TF:
35893 case MULTI_ARG_2_DI_TF:
35894 case MULTI_ARG_2_SI_TF:
35895 case MULTI_ARG_2_HI_TF:
35896 case MULTI_ARG_2_QI_TF:
35897 nargs = 2;
35898 tf_p = true;
35899 break;
35901 default:
35902 gcc_unreachable ();
35905 if (optimize || !target
35906 || GET_MODE (target) != tmode
35907 || !insn_data[icode].operand[0].predicate (target, tmode))
35908 target = gen_reg_rtx (tmode);
35910 gcc_assert (nargs <= 4);
35912 for (i = 0; i < nargs; i++)
35914 tree arg = CALL_EXPR_ARG (exp, i);
35915 rtx op = expand_normal (arg);
35916 int adjust = (comparison_p) ? 1 : 0;
35917 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
35919 if (last_arg_constant && i == nargs - 1)
35921 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
35923 enum insn_code new_icode = icode;
35924 switch (icode)
35926 case CODE_FOR_xop_vpermil2v2df3:
35927 case CODE_FOR_xop_vpermil2v4sf3:
35928 case CODE_FOR_xop_vpermil2v4df3:
35929 case CODE_FOR_xop_vpermil2v8sf3:
35930 error ("the last argument must be a 2-bit immediate");
35931 return gen_reg_rtx (tmode);
35932 case CODE_FOR_xop_rotlv2di3:
35933 new_icode = CODE_FOR_rotlv2di3;
35934 goto xop_rotl;
35935 case CODE_FOR_xop_rotlv4si3:
35936 new_icode = CODE_FOR_rotlv4si3;
35937 goto xop_rotl;
35938 case CODE_FOR_xop_rotlv8hi3:
35939 new_icode = CODE_FOR_rotlv8hi3;
35940 goto xop_rotl;
35941 case CODE_FOR_xop_rotlv16qi3:
35942 new_icode = CODE_FOR_rotlv16qi3;
35943 xop_rotl:
35944 if (CONST_INT_P (op))
35946 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
35947 op = GEN_INT (INTVAL (op) & mask);
35948 gcc_checking_assert
35949 (insn_data[icode].operand[i + 1].predicate (op, mode));
35951 else
35953 gcc_checking_assert
35954 (nargs == 2
35955 && insn_data[new_icode].operand[0].mode == tmode
35956 && insn_data[new_icode].operand[1].mode == tmode
35957 && insn_data[new_icode].operand[2].mode == mode
35958 && insn_data[new_icode].operand[0].predicate
35959 == insn_data[icode].operand[0].predicate
35960 && insn_data[new_icode].operand[1].predicate
35961 == insn_data[icode].operand[1].predicate);
35962 icode = new_icode;
35963 goto non_constant;
35965 break;
35966 default:
35967 gcc_unreachable ();
35971 else
35973 non_constant:
35974 if (VECTOR_MODE_P (mode))
35975 op = safe_vector_operand (op, mode);
35977 /* If we aren't optimizing, only allow one memory operand to be
35978 generated. */
35979 if (memory_operand (op, mode))
35980 num_memory++;
35982 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
35984 if (optimize
35985 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
35986 || num_memory > 1)
35987 op = force_reg (mode, op);
35990 args[i].op = op;
35991 args[i].mode = mode;
35994 switch (nargs)
35996 case 1:
35997 pat = GEN_FCN (icode) (target, args[0].op);
35998 break;
36000 case 2:
36001 if (tf_p)
36002 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
36003 GEN_INT ((int)sub_code));
36004 else if (! comparison_p)
36005 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
36006 else
36008 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
36009 args[0].op,
36010 args[1].op);
36012 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
36014 break;
36016 case 3:
36017 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
36018 break;
36020 case 4:
36021 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
36022 break;
36024 default:
36025 gcc_unreachable ();
36028 if (! pat)
36029 return 0;
36031 emit_insn (pat);
36032 return target;
36035 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
36036 insns with vec_merge. */
36038 static rtx
36039 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
36040 rtx target)
36042 rtx pat;
36043 tree arg0 = CALL_EXPR_ARG (exp, 0);
36044 rtx op1, op0 = expand_normal (arg0);
36045 machine_mode tmode = insn_data[icode].operand[0].mode;
36046 machine_mode mode0 = insn_data[icode].operand[1].mode;
36048 if (optimize || !target
36049 || GET_MODE (target) != tmode
36050 || !insn_data[icode].operand[0].predicate (target, tmode))
36051 target = gen_reg_rtx (tmode);
36053 if (VECTOR_MODE_P (mode0))
36054 op0 = safe_vector_operand (op0, mode0);
36056 if ((optimize && !register_operand (op0, mode0))
36057 || !insn_data[icode].operand[1].predicate (op0, mode0))
36058 op0 = copy_to_mode_reg (mode0, op0);
36060 op1 = op0;
36061 if (!insn_data[icode].operand[2].predicate (op1, mode0))
36062 op1 = copy_to_mode_reg (mode0, op1);
36064 pat = GEN_FCN (icode) (target, op0, op1);
36065 if (! pat)
36066 return 0;
36067 emit_insn (pat);
36068 return target;
36071 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
36073 static rtx
36074 ix86_expand_sse_compare (const struct builtin_description *d,
36075 tree exp, rtx target, bool swap)
36077 rtx pat;
36078 tree arg0 = CALL_EXPR_ARG (exp, 0);
36079 tree arg1 = CALL_EXPR_ARG (exp, 1);
36080 rtx op0 = expand_normal (arg0);
36081 rtx op1 = expand_normal (arg1);
36082 rtx op2;
36083 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36084 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36085 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36086 enum rtx_code comparison = d->comparison;
36088 if (VECTOR_MODE_P (mode0))
36089 op0 = safe_vector_operand (op0, mode0);
36090 if (VECTOR_MODE_P (mode1))
36091 op1 = safe_vector_operand (op1, mode1);
36093 /* Swap operands if we have a comparison that isn't available in
36094 hardware. */
36095 if (swap)
36096 std::swap (op0, op1);
36098 if (optimize || !target
36099 || GET_MODE (target) != tmode
36100 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36101 target = gen_reg_rtx (tmode);
36103 if ((optimize && !register_operand (op0, mode0))
36104 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
36105 op0 = copy_to_mode_reg (mode0, op0);
36106 if ((optimize && !register_operand (op1, mode1))
36107 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
36108 op1 = copy_to_mode_reg (mode1, op1);
36110 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
36111 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36112 if (! pat)
36113 return 0;
36114 emit_insn (pat);
36115 return target;
36118 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
36120 static rtx
36121 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
36122 rtx target)
36124 rtx pat;
36125 tree arg0 = CALL_EXPR_ARG (exp, 0);
36126 tree arg1 = CALL_EXPR_ARG (exp, 1);
36127 rtx op0 = expand_normal (arg0);
36128 rtx op1 = expand_normal (arg1);
36129 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36130 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36131 enum rtx_code comparison = d->comparison;
36133 if (VECTOR_MODE_P (mode0))
36134 op0 = safe_vector_operand (op0, mode0);
36135 if (VECTOR_MODE_P (mode1))
36136 op1 = safe_vector_operand (op1, mode1);
36138 /* Swap operands if we have a comparison that isn't available in
36139 hardware. */
36140 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
36141 std::swap (op0, op1);
36143 target = gen_reg_rtx (SImode);
36144 emit_move_insn (target, const0_rtx);
36145 target = gen_rtx_SUBREG (QImode, target, 0);
36147 if ((optimize && !register_operand (op0, mode0))
36148 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36149 op0 = copy_to_mode_reg (mode0, op0);
36150 if ((optimize && !register_operand (op1, mode1))
36151 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36152 op1 = copy_to_mode_reg (mode1, op1);
36154 pat = GEN_FCN (d->icode) (op0, op1);
36155 if (! pat)
36156 return 0;
36157 emit_insn (pat);
36158 emit_insn (gen_rtx_SET (VOIDmode,
36159 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36160 gen_rtx_fmt_ee (comparison, QImode,
36161 SET_DEST (pat),
36162 const0_rtx)));
36164 return SUBREG_REG (target);
36167 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
36169 static rtx
36170 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
36171 rtx target)
36173 rtx pat;
36174 tree arg0 = CALL_EXPR_ARG (exp, 0);
36175 rtx op1, op0 = expand_normal (arg0);
36176 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36177 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36179 if (optimize || target == 0
36180 || GET_MODE (target) != tmode
36181 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36182 target = gen_reg_rtx (tmode);
36184 if (VECTOR_MODE_P (mode0))
36185 op0 = safe_vector_operand (op0, mode0);
36187 if ((optimize && !register_operand (op0, mode0))
36188 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36189 op0 = copy_to_mode_reg (mode0, op0);
36191 op1 = GEN_INT (d->comparison);
36193 pat = GEN_FCN (d->icode) (target, op0, op1);
36194 if (! pat)
36195 return 0;
36196 emit_insn (pat);
36197 return target;
36200 static rtx
36201 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
36202 tree exp, rtx target)
36204 rtx pat;
36205 tree arg0 = CALL_EXPR_ARG (exp, 0);
36206 tree arg1 = CALL_EXPR_ARG (exp, 1);
36207 rtx op0 = expand_normal (arg0);
36208 rtx op1 = expand_normal (arg1);
36209 rtx op2;
36210 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36211 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36212 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36214 if (optimize || target == 0
36215 || GET_MODE (target) != tmode
36216 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36217 target = gen_reg_rtx (tmode);
36219 op0 = safe_vector_operand (op0, mode0);
36220 op1 = safe_vector_operand (op1, mode1);
36222 if ((optimize && !register_operand (op0, mode0))
36223 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36224 op0 = copy_to_mode_reg (mode0, op0);
36225 if ((optimize && !register_operand (op1, mode1))
36226 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36227 op1 = copy_to_mode_reg (mode1, op1);
36229 op2 = GEN_INT (d->comparison);
36231 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36232 if (! pat)
36233 return 0;
36234 emit_insn (pat);
36235 return target;
36238 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
36240 static rtx
36241 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
36242 rtx target)
36244 rtx pat;
36245 tree arg0 = CALL_EXPR_ARG (exp, 0);
36246 tree arg1 = CALL_EXPR_ARG (exp, 1);
36247 rtx op0 = expand_normal (arg0);
36248 rtx op1 = expand_normal (arg1);
36249 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36250 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36251 enum rtx_code comparison = d->comparison;
36253 if (VECTOR_MODE_P (mode0))
36254 op0 = safe_vector_operand (op0, mode0);
36255 if (VECTOR_MODE_P (mode1))
36256 op1 = safe_vector_operand (op1, mode1);
36258 target = gen_reg_rtx (SImode);
36259 emit_move_insn (target, const0_rtx);
36260 target = gen_rtx_SUBREG (QImode, target, 0);
36262 if ((optimize && !register_operand (op0, mode0))
36263 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36264 op0 = copy_to_mode_reg (mode0, op0);
36265 if ((optimize && !register_operand (op1, mode1))
36266 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36267 op1 = copy_to_mode_reg (mode1, op1);
36269 pat = GEN_FCN (d->icode) (op0, op1);
36270 if (! pat)
36271 return 0;
36272 emit_insn (pat);
36273 emit_insn (gen_rtx_SET (VOIDmode,
36274 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36275 gen_rtx_fmt_ee (comparison, QImode,
36276 SET_DEST (pat),
36277 const0_rtx)));
36279 return SUBREG_REG (target);
36282 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
36284 static rtx
36285 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36286 tree exp, rtx target)
36288 rtx pat;
36289 tree arg0 = CALL_EXPR_ARG (exp, 0);
36290 tree arg1 = CALL_EXPR_ARG (exp, 1);
36291 tree arg2 = CALL_EXPR_ARG (exp, 2);
36292 tree arg3 = CALL_EXPR_ARG (exp, 3);
36293 tree arg4 = CALL_EXPR_ARG (exp, 4);
36294 rtx scratch0, scratch1;
36295 rtx op0 = expand_normal (arg0);
36296 rtx op1 = expand_normal (arg1);
36297 rtx op2 = expand_normal (arg2);
36298 rtx op3 = expand_normal (arg3);
36299 rtx op4 = expand_normal (arg4);
36300 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36302 tmode0 = insn_data[d->icode].operand[0].mode;
36303 tmode1 = insn_data[d->icode].operand[1].mode;
36304 modev2 = insn_data[d->icode].operand[2].mode;
36305 modei3 = insn_data[d->icode].operand[3].mode;
36306 modev4 = insn_data[d->icode].operand[4].mode;
36307 modei5 = insn_data[d->icode].operand[5].mode;
36308 modeimm = insn_data[d->icode].operand[6].mode;
36310 if (VECTOR_MODE_P (modev2))
36311 op0 = safe_vector_operand (op0, modev2);
36312 if (VECTOR_MODE_P (modev4))
36313 op2 = safe_vector_operand (op2, modev4);
36315 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36316 op0 = copy_to_mode_reg (modev2, op0);
36317 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36318 op1 = copy_to_mode_reg (modei3, op1);
36319 if ((optimize && !register_operand (op2, modev4))
36320 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36321 op2 = copy_to_mode_reg (modev4, op2);
36322 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36323 op3 = copy_to_mode_reg (modei5, op3);
36325 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36327 error ("the fifth argument must be an 8-bit immediate");
36328 return const0_rtx;
36331 if (d->code == IX86_BUILTIN_PCMPESTRI128)
36333 if (optimize || !target
36334 || GET_MODE (target) != tmode0
36335 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36336 target = gen_reg_rtx (tmode0);
36338 scratch1 = gen_reg_rtx (tmode1);
36340 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36342 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36344 if (optimize || !target
36345 || GET_MODE (target) != tmode1
36346 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36347 target = gen_reg_rtx (tmode1);
36349 scratch0 = gen_reg_rtx (tmode0);
36351 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36353 else
36355 gcc_assert (d->flag);
36357 scratch0 = gen_reg_rtx (tmode0);
36358 scratch1 = gen_reg_rtx (tmode1);
36360 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36363 if (! pat)
36364 return 0;
36366 emit_insn (pat);
36368 if (d->flag)
36370 target = gen_reg_rtx (SImode);
36371 emit_move_insn (target, const0_rtx);
36372 target = gen_rtx_SUBREG (QImode, target, 0);
36374 emit_insn
36375 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36376 gen_rtx_fmt_ee (EQ, QImode,
36377 gen_rtx_REG ((machine_mode) d->flag,
36378 FLAGS_REG),
36379 const0_rtx)));
36380 return SUBREG_REG (target);
36382 else
36383 return target;
36387 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
36389 static rtx
36390 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36391 tree exp, rtx target)
36393 rtx pat;
36394 tree arg0 = CALL_EXPR_ARG (exp, 0);
36395 tree arg1 = CALL_EXPR_ARG (exp, 1);
36396 tree arg2 = CALL_EXPR_ARG (exp, 2);
36397 rtx scratch0, scratch1;
36398 rtx op0 = expand_normal (arg0);
36399 rtx op1 = expand_normal (arg1);
36400 rtx op2 = expand_normal (arg2);
36401 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36403 tmode0 = insn_data[d->icode].operand[0].mode;
36404 tmode1 = insn_data[d->icode].operand[1].mode;
36405 modev2 = insn_data[d->icode].operand[2].mode;
36406 modev3 = insn_data[d->icode].operand[3].mode;
36407 modeimm = insn_data[d->icode].operand[4].mode;
36409 if (VECTOR_MODE_P (modev2))
36410 op0 = safe_vector_operand (op0, modev2);
36411 if (VECTOR_MODE_P (modev3))
36412 op1 = safe_vector_operand (op1, modev3);
36414 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36415 op0 = copy_to_mode_reg (modev2, op0);
36416 if ((optimize && !register_operand (op1, modev3))
36417 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36418 op1 = copy_to_mode_reg (modev3, op1);
36420 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36422 error ("the third argument must be an 8-bit immediate");
36423 return const0_rtx;
36426 if (d->code == IX86_BUILTIN_PCMPISTRI128)
36428 if (optimize || !target
36429 || GET_MODE (target) != tmode0
36430 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36431 target = gen_reg_rtx (tmode0);
36433 scratch1 = gen_reg_rtx (tmode1);
36435 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36437 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36439 if (optimize || !target
36440 || GET_MODE (target) != tmode1
36441 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36442 target = gen_reg_rtx (tmode1);
36444 scratch0 = gen_reg_rtx (tmode0);
36446 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36448 else
36450 gcc_assert (d->flag);
36452 scratch0 = gen_reg_rtx (tmode0);
36453 scratch1 = gen_reg_rtx (tmode1);
36455 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36458 if (! pat)
36459 return 0;
36461 emit_insn (pat);
36463 if (d->flag)
36465 target = gen_reg_rtx (SImode);
36466 emit_move_insn (target, const0_rtx);
36467 target = gen_rtx_SUBREG (QImode, target, 0);
36469 emit_insn
36470 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36471 gen_rtx_fmt_ee (EQ, QImode,
36472 gen_rtx_REG ((machine_mode) d->flag,
36473 FLAGS_REG),
36474 const0_rtx)));
36475 return SUBREG_REG (target);
36477 else
36478 return target;
36481 /* Subroutine of ix86_expand_builtin to take care of insns with
36482 variable number of operands. */
36484 static rtx
36485 ix86_expand_args_builtin (const struct builtin_description *d,
36486 tree exp, rtx target)
36488 rtx pat, real_target;
36489 unsigned int i, nargs;
36490 unsigned int nargs_constant = 0;
36491 unsigned int mask_pos = 0;
36492 int num_memory = 0;
36493 struct
36495 rtx op;
36496 machine_mode mode;
36497 } args[6];
36498 bool last_arg_count = false;
36499 enum insn_code icode = d->icode;
36500 const struct insn_data_d *insn_p = &insn_data[icode];
36501 machine_mode tmode = insn_p->operand[0].mode;
36502 machine_mode rmode = VOIDmode;
36503 bool swap = false;
36504 enum rtx_code comparison = d->comparison;
36506 switch ((enum ix86_builtin_func_type) d->flag)
36508 case V2DF_FTYPE_V2DF_ROUND:
36509 case V4DF_FTYPE_V4DF_ROUND:
36510 case V4SF_FTYPE_V4SF_ROUND:
36511 case V8SF_FTYPE_V8SF_ROUND:
36512 case V4SI_FTYPE_V4SF_ROUND:
36513 case V8SI_FTYPE_V8SF_ROUND:
36514 return ix86_expand_sse_round (d, exp, target);
36515 case V4SI_FTYPE_V2DF_V2DF_ROUND:
36516 case V8SI_FTYPE_V4DF_V4DF_ROUND:
36517 case V16SI_FTYPE_V8DF_V8DF_ROUND:
36518 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36519 case INT_FTYPE_V8SF_V8SF_PTEST:
36520 case INT_FTYPE_V4DI_V4DI_PTEST:
36521 case INT_FTYPE_V4DF_V4DF_PTEST:
36522 case INT_FTYPE_V4SF_V4SF_PTEST:
36523 case INT_FTYPE_V2DI_V2DI_PTEST:
36524 case INT_FTYPE_V2DF_V2DF_PTEST:
36525 return ix86_expand_sse_ptest (d, exp, target);
36526 case FLOAT128_FTYPE_FLOAT128:
36527 case FLOAT_FTYPE_FLOAT:
36528 case INT_FTYPE_INT:
36529 case UINT64_FTYPE_INT:
36530 case UINT16_FTYPE_UINT16:
36531 case INT64_FTYPE_INT64:
36532 case INT64_FTYPE_V4SF:
36533 case INT64_FTYPE_V2DF:
36534 case INT_FTYPE_V16QI:
36535 case INT_FTYPE_V8QI:
36536 case INT_FTYPE_V8SF:
36537 case INT_FTYPE_V4DF:
36538 case INT_FTYPE_V4SF:
36539 case INT_FTYPE_V2DF:
36540 case INT_FTYPE_V32QI:
36541 case V16QI_FTYPE_V16QI:
36542 case V8SI_FTYPE_V8SF:
36543 case V8SI_FTYPE_V4SI:
36544 case V8HI_FTYPE_V8HI:
36545 case V8HI_FTYPE_V16QI:
36546 case V8QI_FTYPE_V8QI:
36547 case V8SF_FTYPE_V8SF:
36548 case V8SF_FTYPE_V8SI:
36549 case V8SF_FTYPE_V4SF:
36550 case V8SF_FTYPE_V8HI:
36551 case V4SI_FTYPE_V4SI:
36552 case V4SI_FTYPE_V16QI:
36553 case V4SI_FTYPE_V4SF:
36554 case V4SI_FTYPE_V8SI:
36555 case V4SI_FTYPE_V8HI:
36556 case V4SI_FTYPE_V4DF:
36557 case V4SI_FTYPE_V2DF:
36558 case V4HI_FTYPE_V4HI:
36559 case V4DF_FTYPE_V4DF:
36560 case V4DF_FTYPE_V4SI:
36561 case V4DF_FTYPE_V4SF:
36562 case V4DF_FTYPE_V2DF:
36563 case V4SF_FTYPE_V4SF:
36564 case V4SF_FTYPE_V4SI:
36565 case V4SF_FTYPE_V8SF:
36566 case V4SF_FTYPE_V4DF:
36567 case V4SF_FTYPE_V8HI:
36568 case V4SF_FTYPE_V2DF:
36569 case V2DI_FTYPE_V2DI:
36570 case V2DI_FTYPE_V16QI:
36571 case V2DI_FTYPE_V8HI:
36572 case V2DI_FTYPE_V4SI:
36573 case V2DF_FTYPE_V2DF:
36574 case V2DF_FTYPE_V4SI:
36575 case V2DF_FTYPE_V4DF:
36576 case V2DF_FTYPE_V4SF:
36577 case V2DF_FTYPE_V2SI:
36578 case V2SI_FTYPE_V2SI:
36579 case V2SI_FTYPE_V4SF:
36580 case V2SI_FTYPE_V2SF:
36581 case V2SI_FTYPE_V2DF:
36582 case V2SF_FTYPE_V2SF:
36583 case V2SF_FTYPE_V2SI:
36584 case V32QI_FTYPE_V32QI:
36585 case V32QI_FTYPE_V16QI:
36586 case V16HI_FTYPE_V16HI:
36587 case V16HI_FTYPE_V8HI:
36588 case V8SI_FTYPE_V8SI:
36589 case V16HI_FTYPE_V16QI:
36590 case V8SI_FTYPE_V16QI:
36591 case V4DI_FTYPE_V16QI:
36592 case V8SI_FTYPE_V8HI:
36593 case V4DI_FTYPE_V8HI:
36594 case V4DI_FTYPE_V4SI:
36595 case V4DI_FTYPE_V2DI:
36596 case HI_FTYPE_HI:
36597 case HI_FTYPE_V16QI:
36598 case SI_FTYPE_V32QI:
36599 case DI_FTYPE_V64QI:
36600 case V16QI_FTYPE_HI:
36601 case V32QI_FTYPE_SI:
36602 case V64QI_FTYPE_DI:
36603 case V8HI_FTYPE_QI:
36604 case V16HI_FTYPE_HI:
36605 case V32HI_FTYPE_SI:
36606 case V4SI_FTYPE_QI:
36607 case V8SI_FTYPE_QI:
36608 case V4SI_FTYPE_HI:
36609 case V8SI_FTYPE_HI:
36610 case QI_FTYPE_V8HI:
36611 case HI_FTYPE_V16HI:
36612 case SI_FTYPE_V32HI:
36613 case QI_FTYPE_V4SI:
36614 case QI_FTYPE_V8SI:
36615 case HI_FTYPE_V16SI:
36616 case QI_FTYPE_V2DI:
36617 case QI_FTYPE_V4DI:
36618 case QI_FTYPE_V8DI:
36619 case UINT_FTYPE_V2DF:
36620 case UINT_FTYPE_V4SF:
36621 case UINT64_FTYPE_V2DF:
36622 case UINT64_FTYPE_V4SF:
36623 case V16QI_FTYPE_V8DI:
36624 case V16HI_FTYPE_V16SI:
36625 case V16SI_FTYPE_HI:
36626 case V2DI_FTYPE_QI:
36627 case V4DI_FTYPE_QI:
36628 case V16SI_FTYPE_V16SI:
36629 case V16SI_FTYPE_INT:
36630 case V16SF_FTYPE_FLOAT:
36631 case V16SF_FTYPE_V8SF:
36632 case V16SI_FTYPE_V8SI:
36633 case V16SF_FTYPE_V4SF:
36634 case V16SI_FTYPE_V4SI:
36635 case V16SF_FTYPE_V16SF:
36636 case V8HI_FTYPE_V8DI:
36637 case V8UHI_FTYPE_V8UHI:
36638 case V8SI_FTYPE_V8DI:
36639 case V8SF_FTYPE_V8DF:
36640 case V8DI_FTYPE_QI:
36641 case V8DI_FTYPE_INT64:
36642 case V8DI_FTYPE_V4DI:
36643 case V8DI_FTYPE_V8DI:
36644 case V8DF_FTYPE_DOUBLE:
36645 case V8DF_FTYPE_V4DF:
36646 case V8DF_FTYPE_V2DF:
36647 case V8DF_FTYPE_V8DF:
36648 case V8DF_FTYPE_V8SI:
36649 nargs = 1;
36650 break;
36651 case V4SF_FTYPE_V4SF_VEC_MERGE:
36652 case V2DF_FTYPE_V2DF_VEC_MERGE:
36653 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36654 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36655 case V16QI_FTYPE_V16QI_V16QI:
36656 case V16QI_FTYPE_V8HI_V8HI:
36657 case V16SI_FTYPE_V16SI_V16SI:
36658 case V16SF_FTYPE_V16SF_V16SF:
36659 case V16SF_FTYPE_V16SF_V16SI:
36660 case V8QI_FTYPE_V8QI_V8QI:
36661 case V8QI_FTYPE_V4HI_V4HI:
36662 case V8HI_FTYPE_V8HI_V8HI:
36663 case V8HI_FTYPE_V16QI_V16QI:
36664 case V8HI_FTYPE_V4SI_V4SI:
36665 case V8SF_FTYPE_V8SF_V8SF:
36666 case V8SF_FTYPE_V8SF_V8SI:
36667 case V8DI_FTYPE_V8DI_V8DI:
36668 case V8DF_FTYPE_V8DF_V8DF:
36669 case V8DF_FTYPE_V8DF_V8DI:
36670 case V4SI_FTYPE_V4SI_V4SI:
36671 case V4SI_FTYPE_V8HI_V8HI:
36672 case V4SI_FTYPE_V4SF_V4SF:
36673 case V4SI_FTYPE_V2DF_V2DF:
36674 case V4HI_FTYPE_V4HI_V4HI:
36675 case V4HI_FTYPE_V8QI_V8QI:
36676 case V4HI_FTYPE_V2SI_V2SI:
36677 case V4DF_FTYPE_V4DF_V4DF:
36678 case V4DF_FTYPE_V4DF_V4DI:
36679 case V4SF_FTYPE_V4SF_V4SF:
36680 case V4SF_FTYPE_V4SF_V4SI:
36681 case V4SF_FTYPE_V4SF_V2SI:
36682 case V4SF_FTYPE_V4SF_V2DF:
36683 case V4SF_FTYPE_V4SF_UINT:
36684 case V4SF_FTYPE_V4SF_UINT64:
36685 case V4SF_FTYPE_V4SF_DI:
36686 case V4SF_FTYPE_V4SF_SI:
36687 case V2DI_FTYPE_V2DI_V2DI:
36688 case V2DI_FTYPE_V16QI_V16QI:
36689 case V2DI_FTYPE_V4SI_V4SI:
36690 case V2UDI_FTYPE_V4USI_V4USI:
36691 case V2DI_FTYPE_V2DI_V16QI:
36692 case V2DI_FTYPE_V2DF_V2DF:
36693 case V2SI_FTYPE_V2SI_V2SI:
36694 case V2SI_FTYPE_V4HI_V4HI:
36695 case V2SI_FTYPE_V2SF_V2SF:
36696 case V2DF_FTYPE_V2DF_V2DF:
36697 case V2DF_FTYPE_V2DF_V4SF:
36698 case V2DF_FTYPE_V2DF_V2DI:
36699 case V2DF_FTYPE_V2DF_DI:
36700 case V2DF_FTYPE_V2DF_SI:
36701 case V2DF_FTYPE_V2DF_UINT:
36702 case V2DF_FTYPE_V2DF_UINT64:
36703 case V2SF_FTYPE_V2SF_V2SF:
36704 case V1DI_FTYPE_V1DI_V1DI:
36705 case V1DI_FTYPE_V8QI_V8QI:
36706 case V1DI_FTYPE_V2SI_V2SI:
36707 case V32QI_FTYPE_V16HI_V16HI:
36708 case V16HI_FTYPE_V8SI_V8SI:
36709 case V32QI_FTYPE_V32QI_V32QI:
36710 case V16HI_FTYPE_V32QI_V32QI:
36711 case V16HI_FTYPE_V16HI_V16HI:
36712 case V8SI_FTYPE_V4DF_V4DF:
36713 case V8SI_FTYPE_V8SI_V8SI:
36714 case V8SI_FTYPE_V16HI_V16HI:
36715 case V4DI_FTYPE_V4DI_V4DI:
36716 case V4DI_FTYPE_V8SI_V8SI:
36717 case V4UDI_FTYPE_V8USI_V8USI:
36718 case QI_FTYPE_V8DI_V8DI:
36719 case V8DI_FTYPE_V64QI_V64QI:
36720 case HI_FTYPE_V16SI_V16SI:
36721 if (comparison == UNKNOWN)
36722 return ix86_expand_binop_builtin (icode, exp, target);
36723 nargs = 2;
36724 break;
36725 case V4SF_FTYPE_V4SF_V4SF_SWAP:
36726 case V2DF_FTYPE_V2DF_V2DF_SWAP:
36727 gcc_assert (comparison != UNKNOWN);
36728 nargs = 2;
36729 swap = true;
36730 break;
36731 case V16HI_FTYPE_V16HI_V8HI_COUNT:
36732 case V16HI_FTYPE_V16HI_SI_COUNT:
36733 case V8SI_FTYPE_V8SI_V4SI_COUNT:
36734 case V8SI_FTYPE_V8SI_SI_COUNT:
36735 case V4DI_FTYPE_V4DI_V2DI_COUNT:
36736 case V4DI_FTYPE_V4DI_INT_COUNT:
36737 case V8HI_FTYPE_V8HI_V8HI_COUNT:
36738 case V8HI_FTYPE_V8HI_SI_COUNT:
36739 case V4SI_FTYPE_V4SI_V4SI_COUNT:
36740 case V4SI_FTYPE_V4SI_SI_COUNT:
36741 case V4HI_FTYPE_V4HI_V4HI_COUNT:
36742 case V4HI_FTYPE_V4HI_SI_COUNT:
36743 case V2DI_FTYPE_V2DI_V2DI_COUNT:
36744 case V2DI_FTYPE_V2DI_SI_COUNT:
36745 case V2SI_FTYPE_V2SI_V2SI_COUNT:
36746 case V2SI_FTYPE_V2SI_SI_COUNT:
36747 case V1DI_FTYPE_V1DI_V1DI_COUNT:
36748 case V1DI_FTYPE_V1DI_SI_COUNT:
36749 nargs = 2;
36750 last_arg_count = true;
36751 break;
36752 case UINT64_FTYPE_UINT64_UINT64:
36753 case UINT_FTYPE_UINT_UINT:
36754 case UINT_FTYPE_UINT_USHORT:
36755 case UINT_FTYPE_UINT_UCHAR:
36756 case UINT16_FTYPE_UINT16_INT:
36757 case UINT8_FTYPE_UINT8_INT:
36758 case HI_FTYPE_HI_HI:
36759 case SI_FTYPE_SI_SI:
36760 case DI_FTYPE_DI_DI:
36761 case V16SI_FTYPE_V8DF_V8DF:
36762 nargs = 2;
36763 break;
36764 case V2DI_FTYPE_V2DI_INT_CONVERT:
36765 nargs = 2;
36766 rmode = V1TImode;
36767 nargs_constant = 1;
36768 break;
36769 case V4DI_FTYPE_V4DI_INT_CONVERT:
36770 nargs = 2;
36771 rmode = V2TImode;
36772 nargs_constant = 1;
36773 break;
36774 case V8DI_FTYPE_V8DI_INT_CONVERT:
36775 nargs = 2;
36776 rmode = V4TImode;
36777 nargs_constant = 1;
36778 break;
36779 case V8HI_FTYPE_V8HI_INT:
36780 case V8HI_FTYPE_V8SF_INT:
36781 case V16HI_FTYPE_V16SF_INT:
36782 case V8HI_FTYPE_V4SF_INT:
36783 case V8SF_FTYPE_V8SF_INT:
36784 case V4SF_FTYPE_V16SF_INT:
36785 case V16SF_FTYPE_V16SF_INT:
36786 case V4SI_FTYPE_V4SI_INT:
36787 case V4SI_FTYPE_V8SI_INT:
36788 case V4HI_FTYPE_V4HI_INT:
36789 case V4DF_FTYPE_V4DF_INT:
36790 case V4DF_FTYPE_V8DF_INT:
36791 case V4SF_FTYPE_V4SF_INT:
36792 case V4SF_FTYPE_V8SF_INT:
36793 case V2DI_FTYPE_V2DI_INT:
36794 case V2DF_FTYPE_V2DF_INT:
36795 case V2DF_FTYPE_V4DF_INT:
36796 case V16HI_FTYPE_V16HI_INT:
36797 case V8SI_FTYPE_V8SI_INT:
36798 case V16SI_FTYPE_V16SI_INT:
36799 case V4SI_FTYPE_V16SI_INT:
36800 case V4DI_FTYPE_V4DI_INT:
36801 case V2DI_FTYPE_V4DI_INT:
36802 case V4DI_FTYPE_V8DI_INT:
36803 case HI_FTYPE_HI_INT:
36804 case QI_FTYPE_V4SF_INT:
36805 case QI_FTYPE_V2DF_INT:
36806 nargs = 2;
36807 nargs_constant = 1;
36808 break;
36809 case V16QI_FTYPE_V16QI_V16QI_V16QI:
36810 case V8SF_FTYPE_V8SF_V8SF_V8SF:
36811 case V4DF_FTYPE_V4DF_V4DF_V4DF:
36812 case V4SF_FTYPE_V4SF_V4SF_V4SF:
36813 case V2DF_FTYPE_V2DF_V2DF_V2DF:
36814 case V32QI_FTYPE_V32QI_V32QI_V32QI:
36815 case HI_FTYPE_V16SI_V16SI_HI:
36816 case QI_FTYPE_V8DI_V8DI_QI:
36817 case V16HI_FTYPE_V16SI_V16HI_HI:
36818 case V16QI_FTYPE_V16SI_V16QI_HI:
36819 case V16QI_FTYPE_V8DI_V16QI_QI:
36820 case V16SF_FTYPE_V16SF_V16SF_HI:
36821 case V16SF_FTYPE_V16SF_V16SF_V16SF:
36822 case V16SF_FTYPE_V16SF_V16SI_V16SF:
36823 case V16SF_FTYPE_V16SI_V16SF_HI:
36824 case V16SF_FTYPE_V16SI_V16SF_V16SF:
36825 case V16SF_FTYPE_V4SF_V16SF_HI:
36826 case V16SI_FTYPE_SI_V16SI_HI:
36827 case V16SI_FTYPE_V16HI_V16SI_HI:
36828 case V16SI_FTYPE_V16QI_V16SI_HI:
36829 case V16SI_FTYPE_V16SF_V16SI_HI:
36830 case V8SF_FTYPE_V4SF_V8SF_QI:
36831 case V4DF_FTYPE_V2DF_V4DF_QI:
36832 case V8SI_FTYPE_V4SI_V8SI_QI:
36833 case V8SI_FTYPE_SI_V8SI_QI:
36834 case V4SI_FTYPE_V4SI_V4SI_QI:
36835 case V4SI_FTYPE_SI_V4SI_QI:
36836 case V4DI_FTYPE_V2DI_V4DI_QI:
36837 case V4DI_FTYPE_DI_V4DI_QI:
36838 case V2DI_FTYPE_V2DI_V2DI_QI:
36839 case V2DI_FTYPE_DI_V2DI_QI:
36840 case V64QI_FTYPE_V64QI_V64QI_DI:
36841 case V64QI_FTYPE_V16QI_V64QI_DI:
36842 case V64QI_FTYPE_QI_V64QI_DI:
36843 case V32QI_FTYPE_V32QI_V32QI_SI:
36844 case V32QI_FTYPE_V16QI_V32QI_SI:
36845 case V32QI_FTYPE_QI_V32QI_SI:
36846 case V16QI_FTYPE_V16QI_V16QI_HI:
36847 case V16QI_FTYPE_QI_V16QI_HI:
36848 case V32HI_FTYPE_V8HI_V32HI_SI:
36849 case V32HI_FTYPE_HI_V32HI_SI:
36850 case V16HI_FTYPE_V8HI_V16HI_HI:
36851 case V16HI_FTYPE_HI_V16HI_HI:
36852 case V8HI_FTYPE_V8HI_V8HI_QI:
36853 case V8HI_FTYPE_HI_V8HI_QI:
36854 case V8SF_FTYPE_V8HI_V8SF_QI:
36855 case V4SF_FTYPE_V8HI_V4SF_QI:
36856 case V8SI_FTYPE_V8SF_V8SI_QI:
36857 case V4SI_FTYPE_V4SF_V4SI_QI:
36858 case V8DI_FTYPE_V8SF_V8DI_QI:
36859 case V4DI_FTYPE_V4SF_V4DI_QI:
36860 case V2DI_FTYPE_V4SF_V2DI_QI:
36861 case V8SF_FTYPE_V8DI_V8SF_QI:
36862 case V4SF_FTYPE_V4DI_V4SF_QI:
36863 case V4SF_FTYPE_V2DI_V4SF_QI:
36864 case V8DF_FTYPE_V8DI_V8DF_QI:
36865 case V4DF_FTYPE_V4DI_V4DF_QI:
36866 case V2DF_FTYPE_V2DI_V2DF_QI:
36867 case V16QI_FTYPE_V8HI_V16QI_QI:
36868 case V16QI_FTYPE_V16HI_V16QI_HI:
36869 case V16QI_FTYPE_V4SI_V16QI_QI:
36870 case V16QI_FTYPE_V8SI_V16QI_QI:
36871 case V8HI_FTYPE_V4SI_V8HI_QI:
36872 case V8HI_FTYPE_V8SI_V8HI_QI:
36873 case V16QI_FTYPE_V2DI_V16QI_QI:
36874 case V16QI_FTYPE_V4DI_V16QI_QI:
36875 case V8HI_FTYPE_V2DI_V8HI_QI:
36876 case V8HI_FTYPE_V4DI_V8HI_QI:
36877 case V4SI_FTYPE_V2DI_V4SI_QI:
36878 case V4SI_FTYPE_V4DI_V4SI_QI:
36879 case V32QI_FTYPE_V32HI_V32QI_SI:
36880 case HI_FTYPE_V16QI_V16QI_HI:
36881 case SI_FTYPE_V32QI_V32QI_SI:
36882 case DI_FTYPE_V64QI_V64QI_DI:
36883 case QI_FTYPE_V8HI_V8HI_QI:
36884 case HI_FTYPE_V16HI_V16HI_HI:
36885 case SI_FTYPE_V32HI_V32HI_SI:
36886 case QI_FTYPE_V4SI_V4SI_QI:
36887 case QI_FTYPE_V8SI_V8SI_QI:
36888 case QI_FTYPE_V2DI_V2DI_QI:
36889 case QI_FTYPE_V4DI_V4DI_QI:
36890 case V4SF_FTYPE_V2DF_V4SF_QI:
36891 case V4SF_FTYPE_V4DF_V4SF_QI:
36892 case V16SI_FTYPE_V16SI_V16SI_HI:
36893 case V16SI_FTYPE_V16SI_V16SI_V16SI:
36894 case V16SI_FTYPE_V4SI_V16SI_HI:
36895 case V2DI_FTYPE_V2DI_V2DI_V2DI:
36896 case V2DI_FTYPE_V4SI_V2DI_QI:
36897 case V2DI_FTYPE_V8HI_V2DI_QI:
36898 case V2DI_FTYPE_V16QI_V2DI_QI:
36899 case V4DI_FTYPE_V4DI_V4DI_QI:
36900 case V4DI_FTYPE_V4SI_V4DI_QI:
36901 case V4DI_FTYPE_V8HI_V4DI_QI:
36902 case V4DI_FTYPE_V16QI_V4DI_QI:
36903 case V8DI_FTYPE_V8DF_V8DI_QI:
36904 case V4DI_FTYPE_V4DF_V4DI_QI:
36905 case V2DI_FTYPE_V2DF_V2DI_QI:
36906 case V4SI_FTYPE_V4DF_V4SI_QI:
36907 case V4SI_FTYPE_V2DF_V4SI_QI:
36908 case V4SI_FTYPE_V8HI_V4SI_QI:
36909 case V4SI_FTYPE_V16QI_V4SI_QI:
36910 case V8SI_FTYPE_V8SI_V8SI_V8SI:
36911 case V4DI_FTYPE_V4DI_V4DI_V4DI:
36912 case V8DF_FTYPE_V2DF_V8DF_QI:
36913 case V8DF_FTYPE_V4DF_V8DF_QI:
36914 case V8DF_FTYPE_V8DF_V8DF_QI:
36915 case V8DF_FTYPE_V8DF_V8DF_V8DF:
36916 case V8SF_FTYPE_V8SF_V8SF_QI:
36917 case V8SF_FTYPE_V8SI_V8SF_QI:
36918 case V4DF_FTYPE_V4DF_V4DF_QI:
36919 case V4SF_FTYPE_V4SF_V4SF_QI:
36920 case V2DF_FTYPE_V2DF_V2DF_QI:
36921 case V2DF_FTYPE_V4SF_V2DF_QI:
36922 case V2DF_FTYPE_V4SI_V2DF_QI:
36923 case V4SF_FTYPE_V4SI_V4SF_QI:
36924 case V4DF_FTYPE_V4SF_V4DF_QI:
36925 case V4DF_FTYPE_V4SI_V4DF_QI:
36926 case V8SI_FTYPE_V8SI_V8SI_QI:
36927 case V8SI_FTYPE_V8HI_V8SI_QI:
36928 case V8SI_FTYPE_V16QI_V8SI_QI:
36929 case V8DF_FTYPE_V8DF_V8DI_V8DF:
36930 case V8DF_FTYPE_V8DI_V8DF_V8DF:
36931 case V8DF_FTYPE_V8SF_V8DF_QI:
36932 case V8DF_FTYPE_V8SI_V8DF_QI:
36933 case V8DI_FTYPE_DI_V8DI_QI:
36934 case V16SF_FTYPE_V8SF_V16SF_HI:
36935 case V16SI_FTYPE_V8SI_V16SI_HI:
36936 case V16HI_FTYPE_V16HI_V16HI_HI:
36937 case V8HI_FTYPE_V16QI_V8HI_QI:
36938 case V16HI_FTYPE_V16QI_V16HI_HI:
36939 case V32HI_FTYPE_V32HI_V32HI_SI:
36940 case V32HI_FTYPE_V32QI_V32HI_SI:
36941 case V8DI_FTYPE_V16QI_V8DI_QI:
36942 case V8DI_FTYPE_V2DI_V8DI_QI:
36943 case V8DI_FTYPE_V4DI_V8DI_QI:
36944 case V8DI_FTYPE_V8DI_V8DI_QI:
36945 case V8DI_FTYPE_V8DI_V8DI_V8DI:
36946 case V8DI_FTYPE_V8HI_V8DI_QI:
36947 case V8DI_FTYPE_V8SI_V8DI_QI:
36948 case V8HI_FTYPE_V8DI_V8HI_QI:
36949 case V8SF_FTYPE_V8DF_V8SF_QI:
36950 case V8SI_FTYPE_V8DF_V8SI_QI:
36951 case V8SI_FTYPE_V8DI_V8SI_QI:
36952 case V4SI_FTYPE_V4SI_V4SI_V4SI:
36953 nargs = 3;
36954 break;
36955 case V32QI_FTYPE_V32QI_V32QI_INT:
36956 case V16HI_FTYPE_V16HI_V16HI_INT:
36957 case V16QI_FTYPE_V16QI_V16QI_INT:
36958 case V4DI_FTYPE_V4DI_V4DI_INT:
36959 case V8HI_FTYPE_V8HI_V8HI_INT:
36960 case V8SI_FTYPE_V8SI_V8SI_INT:
36961 case V8SI_FTYPE_V8SI_V4SI_INT:
36962 case V8SF_FTYPE_V8SF_V8SF_INT:
36963 case V8SF_FTYPE_V8SF_V4SF_INT:
36964 case V4SI_FTYPE_V4SI_V4SI_INT:
36965 case V4DF_FTYPE_V4DF_V4DF_INT:
36966 case V16SF_FTYPE_V16SF_V16SF_INT:
36967 case V16SF_FTYPE_V16SF_V4SF_INT:
36968 case V16SI_FTYPE_V16SI_V4SI_INT:
36969 case V4DF_FTYPE_V4DF_V2DF_INT:
36970 case V4SF_FTYPE_V4SF_V4SF_INT:
36971 case V2DI_FTYPE_V2DI_V2DI_INT:
36972 case V4DI_FTYPE_V4DI_V2DI_INT:
36973 case V2DF_FTYPE_V2DF_V2DF_INT:
36974 case QI_FTYPE_V8DI_V8DI_INT:
36975 case QI_FTYPE_V8DF_V8DF_INT:
36976 case QI_FTYPE_V2DF_V2DF_INT:
36977 case QI_FTYPE_V4SF_V4SF_INT:
36978 case HI_FTYPE_V16SI_V16SI_INT:
36979 case HI_FTYPE_V16SF_V16SF_INT:
36980 nargs = 3;
36981 nargs_constant = 1;
36982 break;
36983 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
36984 nargs = 3;
36985 rmode = V4DImode;
36986 nargs_constant = 1;
36987 break;
36988 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
36989 nargs = 3;
36990 rmode = V2DImode;
36991 nargs_constant = 1;
36992 break;
36993 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
36994 nargs = 3;
36995 rmode = DImode;
36996 nargs_constant = 1;
36997 break;
36998 case V2DI_FTYPE_V2DI_UINT_UINT:
36999 nargs = 3;
37000 nargs_constant = 2;
37001 break;
37002 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
37003 nargs = 3;
37004 rmode = V8DImode;
37005 nargs_constant = 1;
37006 break;
37007 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
37008 nargs = 5;
37009 rmode = V8DImode;
37010 mask_pos = 2;
37011 nargs_constant = 1;
37012 break;
37013 case QI_FTYPE_V8DF_INT_QI:
37014 case QI_FTYPE_V4DF_INT_QI:
37015 case QI_FTYPE_V2DF_INT_QI:
37016 case HI_FTYPE_V16SF_INT_HI:
37017 case QI_FTYPE_V8SF_INT_QI:
37018 case QI_FTYPE_V4SF_INT_QI:
37019 nargs = 3;
37020 mask_pos = 1;
37021 nargs_constant = 1;
37022 break;
37023 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
37024 nargs = 5;
37025 rmode = V4DImode;
37026 mask_pos = 2;
37027 nargs_constant = 1;
37028 break;
37029 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
37030 nargs = 5;
37031 rmode = V2DImode;
37032 mask_pos = 2;
37033 nargs_constant = 1;
37034 break;
37035 case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
37036 case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
37037 case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
37038 case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
37039 case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
37040 case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
37041 case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
37042 case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
37043 case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
37044 case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
37045 case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
37046 case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
37047 case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
37048 case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
37049 case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
37050 case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
37051 case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
37052 case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
37053 case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
37054 case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
37055 case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
37056 case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
37057 case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
37058 case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
37059 case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
37060 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
37061 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
37062 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
37063 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
37064 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
37065 case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
37066 case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
37067 case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
37068 case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
37069 case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
37070 case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
37071 case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
37072 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
37073 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
37074 case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
37075 case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
37076 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
37077 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
37078 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
37079 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
37080 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
37081 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
37082 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
37083 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
37084 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
37085 case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
37086 case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
37087 case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
37088 case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
37089 nargs = 4;
37090 break;
37091 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
37092 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
37093 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
37094 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
37095 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
37096 nargs = 4;
37097 nargs_constant = 1;
37098 break;
37099 case QI_FTYPE_V4DI_V4DI_INT_QI:
37100 case QI_FTYPE_V8SI_V8SI_INT_QI:
37101 case QI_FTYPE_V4DF_V4DF_INT_QI:
37102 case QI_FTYPE_V8SF_V8SF_INT_QI:
37103 case QI_FTYPE_V2DI_V2DI_INT_QI:
37104 case QI_FTYPE_V4SI_V4SI_INT_QI:
37105 case QI_FTYPE_V2DF_V2DF_INT_QI:
37106 case QI_FTYPE_V4SF_V4SF_INT_QI:
37107 case DI_FTYPE_V64QI_V64QI_INT_DI:
37108 case SI_FTYPE_V32QI_V32QI_INT_SI:
37109 case HI_FTYPE_V16QI_V16QI_INT_HI:
37110 case SI_FTYPE_V32HI_V32HI_INT_SI:
37111 case HI_FTYPE_V16HI_V16HI_INT_HI:
37112 case QI_FTYPE_V8HI_V8HI_INT_QI:
37113 nargs = 4;
37114 mask_pos = 1;
37115 nargs_constant = 1;
37116 break;
37117 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
37118 nargs = 4;
37119 nargs_constant = 2;
37120 break;
37121 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
37122 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
37123 nargs = 4;
37124 break;
37125 case QI_FTYPE_V8DI_V8DI_INT_QI:
37126 case HI_FTYPE_V16SI_V16SI_INT_HI:
37127 case QI_FTYPE_V8DF_V8DF_INT_QI:
37128 case HI_FTYPE_V16SF_V16SF_INT_HI:
37129 mask_pos = 1;
37130 nargs = 4;
37131 nargs_constant = 1;
37132 break;
37133 case V8SF_FTYPE_V8SF_INT_V8SF_QI:
37134 case V4SF_FTYPE_V4SF_INT_V4SF_QI:
37135 case V2DF_FTYPE_V4DF_INT_V2DF_QI:
37136 case V2DI_FTYPE_V4DI_INT_V2DI_QI:
37137 case V8SF_FTYPE_V16SF_INT_V8SF_QI:
37138 case V8SI_FTYPE_V16SI_INT_V8SI_QI:
37139 case V2DF_FTYPE_V8DF_INT_V2DF_QI:
37140 case V2DI_FTYPE_V8DI_INT_V2DI_QI:
37141 case V4SF_FTYPE_V8SF_INT_V4SF_QI:
37142 case V4SI_FTYPE_V8SI_INT_V4SI_QI:
37143 case V8HI_FTYPE_V8SF_INT_V8HI_QI:
37144 case V8HI_FTYPE_V4SF_INT_V8HI_QI:
37145 case V32HI_FTYPE_V32HI_INT_V32HI_SI:
37146 case V16HI_FTYPE_V16HI_INT_V16HI_HI:
37147 case V8HI_FTYPE_V8HI_INT_V8HI_QI:
37148 case V4DI_FTYPE_V4DI_INT_V4DI_QI:
37149 case V2DI_FTYPE_V2DI_INT_V2DI_QI:
37150 case V8SI_FTYPE_V8SI_INT_V8SI_QI:
37151 case V4SI_FTYPE_V4SI_INT_V4SI_QI:
37152 case V4DF_FTYPE_V4DF_INT_V4DF_QI:
37153 case V2DF_FTYPE_V2DF_INT_V2DF_QI:
37154 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
37155 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
37156 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
37157 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
37158 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
37159 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
37160 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
37161 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
37162 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
37163 nargs = 4;
37164 mask_pos = 2;
37165 nargs_constant = 1;
37166 break;
37167 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
37168 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
37169 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
37170 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
37171 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
37172 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
37173 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
37174 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
37175 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
37176 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
37177 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
37178 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
37179 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
37180 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
37181 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
37182 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
37183 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
37184 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
37185 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
37186 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
37187 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
37188 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
37189 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
37190 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
37191 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
37192 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
37193 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
37194 nargs = 5;
37195 mask_pos = 2;
37196 nargs_constant = 1;
37197 break;
37198 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
37199 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
37200 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
37201 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
37202 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
37203 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
37204 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
37205 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
37206 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
37207 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
37208 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
37209 nargs = 5;
37210 nargs = 5;
37211 mask_pos = 1;
37212 nargs_constant = 1;
37213 break;
37215 default:
37216 gcc_unreachable ();
37219 gcc_assert (nargs <= ARRAY_SIZE (args));
37221 if (comparison != UNKNOWN)
37223 gcc_assert (nargs == 2);
37224 return ix86_expand_sse_compare (d, exp, target, swap);
37227 if (rmode == VOIDmode || rmode == tmode)
37229 if (optimize
37230 || target == 0
37231 || GET_MODE (target) != tmode
37232 || !insn_p->operand[0].predicate (target, tmode))
37233 target = gen_reg_rtx (tmode);
37234 real_target = target;
37236 else
37238 real_target = gen_reg_rtx (tmode);
37239 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
37242 for (i = 0; i < nargs; i++)
37244 tree arg = CALL_EXPR_ARG (exp, i);
37245 rtx op = expand_normal (arg);
37246 machine_mode mode = insn_p->operand[i + 1].mode;
37247 bool match = insn_p->operand[i + 1].predicate (op, mode);
37249 if (last_arg_count && (i + 1) == nargs)
37251 /* SIMD shift insns take either an 8-bit immediate or
37252 register as count. But builtin functions take int as
37253 count. If count doesn't match, we put it in register. */
37254 if (!match)
37256 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
37257 if (!insn_p->operand[i + 1].predicate (op, mode))
37258 op = copy_to_reg (op);
37261 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37262 (!mask_pos && (nargs - i) <= nargs_constant))
37264 if (!match)
37265 switch (icode)
37267 case CODE_FOR_avx_vinsertf128v4di:
37268 case CODE_FOR_avx_vextractf128v4di:
37269 error ("the last argument must be an 1-bit immediate");
37270 return const0_rtx;
37272 case CODE_FOR_avx512f_cmpv8di3_mask:
37273 case CODE_FOR_avx512f_cmpv16si3_mask:
37274 case CODE_FOR_avx512f_ucmpv8di3_mask:
37275 case CODE_FOR_avx512f_ucmpv16si3_mask:
37276 case CODE_FOR_avx512vl_cmpv4di3_mask:
37277 case CODE_FOR_avx512vl_cmpv8si3_mask:
37278 case CODE_FOR_avx512vl_ucmpv4di3_mask:
37279 case CODE_FOR_avx512vl_ucmpv8si3_mask:
37280 case CODE_FOR_avx512vl_cmpv2di3_mask:
37281 case CODE_FOR_avx512vl_cmpv4si3_mask:
37282 case CODE_FOR_avx512vl_ucmpv2di3_mask:
37283 case CODE_FOR_avx512vl_ucmpv4si3_mask:
37284 error ("the last argument must be a 3-bit immediate");
37285 return const0_rtx;
37287 case CODE_FOR_sse4_1_roundsd:
37288 case CODE_FOR_sse4_1_roundss:
37290 case CODE_FOR_sse4_1_roundpd:
37291 case CODE_FOR_sse4_1_roundps:
37292 case CODE_FOR_avx_roundpd256:
37293 case CODE_FOR_avx_roundps256:
37295 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37296 case CODE_FOR_sse4_1_roundps_sfix:
37297 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37298 case CODE_FOR_avx_roundps_sfix256:
37300 case CODE_FOR_sse4_1_blendps:
37301 case CODE_FOR_avx_blendpd256:
37302 case CODE_FOR_avx_vpermilv4df:
37303 case CODE_FOR_avx_vpermilv4df_mask:
37304 case CODE_FOR_avx512f_getmantv8df_mask:
37305 case CODE_FOR_avx512f_getmantv16sf_mask:
37306 case CODE_FOR_avx512vl_getmantv8sf_mask:
37307 case CODE_FOR_avx512vl_getmantv4df_mask:
37308 case CODE_FOR_avx512vl_getmantv4sf_mask:
37309 case CODE_FOR_avx512vl_getmantv2df_mask:
37310 case CODE_FOR_avx512dq_rangepv8df_mask_round:
37311 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37312 case CODE_FOR_avx512dq_rangepv4df_mask:
37313 case CODE_FOR_avx512dq_rangepv8sf_mask:
37314 case CODE_FOR_avx512dq_rangepv2df_mask:
37315 case CODE_FOR_avx512dq_rangepv4sf_mask:
37316 case CODE_FOR_avx_shufpd256_mask:
37317 error ("the last argument must be a 4-bit immediate");
37318 return const0_rtx;
37320 case CODE_FOR_sha1rnds4:
37321 case CODE_FOR_sse4_1_blendpd:
37322 case CODE_FOR_avx_vpermilv2df:
37323 case CODE_FOR_avx_vpermilv2df_mask:
37324 case CODE_FOR_xop_vpermil2v2df3:
37325 case CODE_FOR_xop_vpermil2v4sf3:
37326 case CODE_FOR_xop_vpermil2v4df3:
37327 case CODE_FOR_xop_vpermil2v8sf3:
37328 case CODE_FOR_avx512f_vinsertf32x4_mask:
37329 case CODE_FOR_avx512f_vinserti32x4_mask:
37330 case CODE_FOR_avx512f_vextractf32x4_mask:
37331 case CODE_FOR_avx512f_vextracti32x4_mask:
37332 case CODE_FOR_sse2_shufpd:
37333 case CODE_FOR_sse2_shufpd_mask:
37334 case CODE_FOR_avx512dq_shuf_f64x2_mask:
37335 case CODE_FOR_avx512dq_shuf_i64x2_mask:
37336 case CODE_FOR_avx512vl_shuf_i32x4_mask:
37337 case CODE_FOR_avx512vl_shuf_f32x4_mask:
37338 error ("the last argument must be a 2-bit immediate");
37339 return const0_rtx;
37341 case CODE_FOR_avx_vextractf128v4df:
37342 case CODE_FOR_avx_vextractf128v8sf:
37343 case CODE_FOR_avx_vextractf128v8si:
37344 case CODE_FOR_avx_vinsertf128v4df:
37345 case CODE_FOR_avx_vinsertf128v8sf:
37346 case CODE_FOR_avx_vinsertf128v8si:
37347 case CODE_FOR_avx512f_vinsertf64x4_mask:
37348 case CODE_FOR_avx512f_vinserti64x4_mask:
37349 case CODE_FOR_avx512f_vextractf64x4_mask:
37350 case CODE_FOR_avx512f_vextracti64x4_mask:
37351 case CODE_FOR_avx512dq_vinsertf32x8_mask:
37352 case CODE_FOR_avx512dq_vinserti32x8_mask:
37353 case CODE_FOR_avx512vl_vinsertv4df:
37354 case CODE_FOR_avx512vl_vinsertv4di:
37355 case CODE_FOR_avx512vl_vinsertv8sf:
37356 case CODE_FOR_avx512vl_vinsertv8si:
37357 error ("the last argument must be a 1-bit immediate");
37358 return const0_rtx;
37360 case CODE_FOR_avx_vmcmpv2df3:
37361 case CODE_FOR_avx_vmcmpv4sf3:
37362 case CODE_FOR_avx_cmpv2df3:
37363 case CODE_FOR_avx_cmpv4sf3:
37364 case CODE_FOR_avx_cmpv4df3:
37365 case CODE_FOR_avx_cmpv8sf3:
37366 case CODE_FOR_avx512f_cmpv8df3_mask:
37367 case CODE_FOR_avx512f_cmpv16sf3_mask:
37368 case CODE_FOR_avx512f_vmcmpv2df3_mask:
37369 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37370 error ("the last argument must be a 5-bit immediate");
37371 return const0_rtx;
37373 default:
37374 switch (nargs_constant)
37376 case 2:
37377 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37378 (!mask_pos && (nargs - i) == nargs_constant))
37380 error ("the next to last argument must be an 8-bit immediate");
37381 break;
37383 case 1:
37384 error ("the last argument must be an 8-bit immediate");
37385 break;
37386 default:
37387 gcc_unreachable ();
37389 return const0_rtx;
37392 else
37394 if (VECTOR_MODE_P (mode))
37395 op = safe_vector_operand (op, mode);
37397 /* If we aren't optimizing, only allow one memory operand to
37398 be generated. */
37399 if (memory_operand (op, mode))
37400 num_memory++;
37402 op = fixup_modeless_constant (op, mode);
37404 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37406 if (optimize || !match || num_memory > 1)
37407 op = copy_to_mode_reg (mode, op);
37409 else
37411 op = copy_to_reg (op);
37412 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37416 args[i].op = op;
37417 args[i].mode = mode;
37420 switch (nargs)
37422 case 1:
37423 pat = GEN_FCN (icode) (real_target, args[0].op);
37424 break;
37425 case 2:
37426 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37427 break;
37428 case 3:
37429 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37430 args[2].op);
37431 break;
37432 case 4:
37433 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37434 args[2].op, args[3].op);
37435 break;
37436 case 5:
37437 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37438 args[2].op, args[3].op, args[4].op);
37439 case 6:
37440 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37441 args[2].op, args[3].op, args[4].op,
37442 args[5].op);
37443 break;
37444 default:
37445 gcc_unreachable ();
37448 if (! pat)
37449 return 0;
37451 emit_insn (pat);
37452 return target;
37455 /* Transform pattern of following layout:
37456 (parallel [
37457 set (A B)
37458 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37460 into:
37461 (set (A B))
37464 (parallel [ A B
37466 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37469 into:
37470 (parallel [ A B ... ]) */
37472 static rtx
37473 ix86_erase_embedded_rounding (rtx pat)
37475 if (GET_CODE (pat) == INSN)
37476 pat = PATTERN (pat);
37478 gcc_assert (GET_CODE (pat) == PARALLEL);
37480 if (XVECLEN (pat, 0) == 2)
37482 rtx p0 = XVECEXP (pat, 0, 0);
37483 rtx p1 = XVECEXP (pat, 0, 1);
37485 gcc_assert (GET_CODE (p0) == SET
37486 && GET_CODE (p1) == UNSPEC
37487 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37489 return p0;
37491 else
37493 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37494 int i = 0;
37495 int j = 0;
37497 for (; i < XVECLEN (pat, 0); ++i)
37499 rtx elem = XVECEXP (pat, 0, i);
37500 if (GET_CODE (elem) != UNSPEC
37501 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37502 res [j++] = elem;
37505 /* No more than 1 occurence was removed. */
37506 gcc_assert (j >= XVECLEN (pat, 0) - 1);
37508 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37512 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
37513 with rounding. */
37514 static rtx
37515 ix86_expand_sse_comi_round (const struct builtin_description *d,
37516 tree exp, rtx target)
37518 rtx pat, set_dst;
37519 tree arg0 = CALL_EXPR_ARG (exp, 0);
37520 tree arg1 = CALL_EXPR_ARG (exp, 1);
37521 tree arg2 = CALL_EXPR_ARG (exp, 2);
37522 tree arg3 = CALL_EXPR_ARG (exp, 3);
37523 rtx op0 = expand_normal (arg0);
37524 rtx op1 = expand_normal (arg1);
37525 rtx op2 = expand_normal (arg2);
37526 rtx op3 = expand_normal (arg3);
37527 enum insn_code icode = d->icode;
37528 const struct insn_data_d *insn_p = &insn_data[icode];
37529 machine_mode mode0 = insn_p->operand[0].mode;
37530 machine_mode mode1 = insn_p->operand[1].mode;
37531 enum rtx_code comparison = UNEQ;
37532 bool need_ucomi = false;
37534 /* See avxintrin.h for values. */
37535 enum rtx_code comi_comparisons[32] =
37537 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37538 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37539 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37541 bool need_ucomi_values[32] =
37543 true, false, false, true, true, false, false, true,
37544 true, false, false, true, true, false, false, true,
37545 false, true, true, false, false, true, true, false,
37546 false, true, true, false, false, true, true, false
37549 if (!CONST_INT_P (op2))
37551 error ("the third argument must be comparison constant");
37552 return const0_rtx;
37554 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37556 error ("incorrect comparison mode");
37557 return const0_rtx;
37560 if (!insn_p->operand[2].predicate (op3, SImode))
37562 error ("incorrect rounding operand");
37563 return const0_rtx;
37566 comparison = comi_comparisons[INTVAL (op2)];
37567 need_ucomi = need_ucomi_values[INTVAL (op2)];
37569 if (VECTOR_MODE_P (mode0))
37570 op0 = safe_vector_operand (op0, mode0);
37571 if (VECTOR_MODE_P (mode1))
37572 op1 = safe_vector_operand (op1, mode1);
37574 target = gen_reg_rtx (SImode);
37575 emit_move_insn (target, const0_rtx);
37576 target = gen_rtx_SUBREG (QImode, target, 0);
37578 if ((optimize && !register_operand (op0, mode0))
37579 || !insn_p->operand[0].predicate (op0, mode0))
37580 op0 = copy_to_mode_reg (mode0, op0);
37581 if ((optimize && !register_operand (op1, mode1))
37582 || !insn_p->operand[1].predicate (op1, mode1))
37583 op1 = copy_to_mode_reg (mode1, op1);
37585 if (need_ucomi)
37586 icode = icode == CODE_FOR_sse_comi_round
37587 ? CODE_FOR_sse_ucomi_round
37588 : CODE_FOR_sse2_ucomi_round;
37590 pat = GEN_FCN (icode) (op0, op1, op3);
37591 if (! pat)
37592 return 0;
37594 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
37595 if (INTVAL (op3) == NO_ROUND)
37597 pat = ix86_erase_embedded_rounding (pat);
37598 if (! pat)
37599 return 0;
37601 set_dst = SET_DEST (pat);
37603 else
37605 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37606 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37609 emit_insn (pat);
37610 emit_insn (gen_rtx_SET (VOIDmode,
37611 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37612 gen_rtx_fmt_ee (comparison, QImode,
37613 set_dst,
37614 const0_rtx)));
37616 return SUBREG_REG (target);
37619 static rtx
37620 ix86_expand_round_builtin (const struct builtin_description *d,
37621 tree exp, rtx target)
37623 rtx pat;
37624 unsigned int i, nargs;
37625 struct
37627 rtx op;
37628 machine_mode mode;
37629 } args[6];
37630 enum insn_code icode = d->icode;
37631 const struct insn_data_d *insn_p = &insn_data[icode];
37632 machine_mode tmode = insn_p->operand[0].mode;
37633 unsigned int nargs_constant = 0;
37634 unsigned int redundant_embed_rnd = 0;
37636 switch ((enum ix86_builtin_func_type) d->flag)
37638 case UINT64_FTYPE_V2DF_INT:
37639 case UINT64_FTYPE_V4SF_INT:
37640 case UINT_FTYPE_V2DF_INT:
37641 case UINT_FTYPE_V4SF_INT:
37642 case INT64_FTYPE_V2DF_INT:
37643 case INT64_FTYPE_V4SF_INT:
37644 case INT_FTYPE_V2DF_INT:
37645 case INT_FTYPE_V4SF_INT:
37646 nargs = 2;
37647 break;
37648 case V4SF_FTYPE_V4SF_UINT_INT:
37649 case V4SF_FTYPE_V4SF_UINT64_INT:
37650 case V2DF_FTYPE_V2DF_UINT64_INT:
37651 case V4SF_FTYPE_V4SF_INT_INT:
37652 case V4SF_FTYPE_V4SF_INT64_INT:
37653 case V2DF_FTYPE_V2DF_INT64_INT:
37654 case V4SF_FTYPE_V4SF_V4SF_INT:
37655 case V2DF_FTYPE_V2DF_V2DF_INT:
37656 case V4SF_FTYPE_V4SF_V2DF_INT:
37657 case V2DF_FTYPE_V2DF_V4SF_INT:
37658 nargs = 3;
37659 break;
37660 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37661 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37662 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37663 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37664 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37665 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37666 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37667 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37668 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37669 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37670 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37671 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37672 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37673 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37674 nargs = 4;
37675 break;
37676 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37677 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37678 nargs_constant = 2;
37679 nargs = 4;
37680 break;
37681 case INT_FTYPE_V4SF_V4SF_INT_INT:
37682 case INT_FTYPE_V2DF_V2DF_INT_INT:
37683 return ix86_expand_sse_comi_round (d, exp, target);
37684 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
37685 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37686 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37687 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
37688 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
37689 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
37690 nargs = 5;
37691 break;
37692 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
37693 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
37694 nargs_constant = 4;
37695 nargs = 5;
37696 break;
37697 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
37698 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
37699 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
37700 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
37701 nargs_constant = 3;
37702 nargs = 5;
37703 break;
37704 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
37705 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
37706 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
37707 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
37708 nargs = 6;
37709 nargs_constant = 4;
37710 break;
37711 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
37712 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
37713 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
37714 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
37715 nargs = 6;
37716 nargs_constant = 3;
37717 break;
37718 default:
37719 gcc_unreachable ();
37721 gcc_assert (nargs <= ARRAY_SIZE (args));
37723 if (optimize
37724 || target == 0
37725 || GET_MODE (target) != tmode
37726 || !insn_p->operand[0].predicate (target, tmode))
37727 target = gen_reg_rtx (tmode);
37729 for (i = 0; i < nargs; i++)
37731 tree arg = CALL_EXPR_ARG (exp, i);
37732 rtx op = expand_normal (arg);
37733 machine_mode mode = insn_p->operand[i + 1].mode;
37734 bool match = insn_p->operand[i + 1].predicate (op, mode);
37736 if (i == nargs - nargs_constant)
37738 if (!match)
37740 switch (icode)
37742 case CODE_FOR_avx512f_getmantv8df_mask_round:
37743 case CODE_FOR_avx512f_getmantv16sf_mask_round:
37744 case CODE_FOR_avx512f_vgetmantv2df_round:
37745 case CODE_FOR_avx512f_vgetmantv4sf_round:
37746 error ("the immediate argument must be a 4-bit immediate");
37747 return const0_rtx;
37748 case CODE_FOR_avx512f_cmpv8df3_mask_round:
37749 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
37750 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
37751 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
37752 error ("the immediate argument must be a 5-bit immediate");
37753 return const0_rtx;
37754 default:
37755 error ("the immediate argument must be an 8-bit immediate");
37756 return const0_rtx;
37760 else if (i == nargs-1)
37762 if (!insn_p->operand[nargs].predicate (op, SImode))
37764 error ("incorrect rounding operand");
37765 return const0_rtx;
37768 /* If there is no rounding use normal version of the pattern. */
37769 if (INTVAL (op) == NO_ROUND)
37770 redundant_embed_rnd = 1;
37772 else
37774 if (VECTOR_MODE_P (mode))
37775 op = safe_vector_operand (op, mode);
37777 op = fixup_modeless_constant (op, mode);
37779 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37781 if (optimize || !match)
37782 op = copy_to_mode_reg (mode, op);
37784 else
37786 op = copy_to_reg (op);
37787 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37791 args[i].op = op;
37792 args[i].mode = mode;
37795 switch (nargs)
37797 case 1:
37798 pat = GEN_FCN (icode) (target, args[0].op);
37799 break;
37800 case 2:
37801 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37802 break;
37803 case 3:
37804 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37805 args[2].op);
37806 break;
37807 case 4:
37808 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37809 args[2].op, args[3].op);
37810 break;
37811 case 5:
37812 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37813 args[2].op, args[3].op, args[4].op);
37814 case 6:
37815 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37816 args[2].op, args[3].op, args[4].op,
37817 args[5].op);
37818 break;
37819 default:
37820 gcc_unreachable ();
37823 if (!pat)
37824 return 0;
37826 if (redundant_embed_rnd)
37827 pat = ix86_erase_embedded_rounding (pat);
37829 emit_insn (pat);
37830 return target;
37833 /* Subroutine of ix86_expand_builtin to take care of special insns
37834 with variable number of operands. */
37836 static rtx
37837 ix86_expand_special_args_builtin (const struct builtin_description *d,
37838 tree exp, rtx target)
37840 tree arg;
37841 rtx pat, op;
37842 unsigned int i, nargs, arg_adjust, memory;
37843 bool aligned_mem = false;
37844 struct
37846 rtx op;
37847 machine_mode mode;
37848 } args[3];
37849 enum insn_code icode = d->icode;
37850 bool last_arg_constant = false;
37851 const struct insn_data_d *insn_p = &insn_data[icode];
37852 machine_mode tmode = insn_p->operand[0].mode;
37853 enum { load, store } klass;
37855 switch ((enum ix86_builtin_func_type) d->flag)
37857 case VOID_FTYPE_VOID:
37858 emit_insn (GEN_FCN (icode) (target));
37859 return 0;
37860 case VOID_FTYPE_UINT64:
37861 case VOID_FTYPE_UNSIGNED:
37862 nargs = 0;
37863 klass = store;
37864 memory = 0;
37865 break;
37867 case INT_FTYPE_VOID:
37868 case USHORT_FTYPE_VOID:
37869 case UINT64_FTYPE_VOID:
37870 case UNSIGNED_FTYPE_VOID:
37871 nargs = 0;
37872 klass = load;
37873 memory = 0;
37874 break;
37875 case UINT64_FTYPE_PUNSIGNED:
37876 case V2DI_FTYPE_PV2DI:
37877 case V4DI_FTYPE_PV4DI:
37878 case V32QI_FTYPE_PCCHAR:
37879 case V16QI_FTYPE_PCCHAR:
37880 case V8SF_FTYPE_PCV4SF:
37881 case V8SF_FTYPE_PCFLOAT:
37882 case V4SF_FTYPE_PCFLOAT:
37883 case V4DF_FTYPE_PCV2DF:
37884 case V4DF_FTYPE_PCDOUBLE:
37885 case V2DF_FTYPE_PCDOUBLE:
37886 case VOID_FTYPE_PVOID:
37887 case V16SI_FTYPE_PV4SI:
37888 case V16SF_FTYPE_PV4SF:
37889 case V8DI_FTYPE_PV4DI:
37890 case V8DI_FTYPE_PV8DI:
37891 case V8DF_FTYPE_PV4DF:
37892 nargs = 1;
37893 klass = load;
37894 memory = 0;
37895 switch (icode)
37897 case CODE_FOR_sse4_1_movntdqa:
37898 case CODE_FOR_avx2_movntdqa:
37899 case CODE_FOR_avx512f_movntdqa:
37900 aligned_mem = true;
37901 break;
37902 default:
37903 break;
37905 break;
37906 case VOID_FTYPE_PV2SF_V4SF:
37907 case VOID_FTYPE_PV8DI_V8DI:
37908 case VOID_FTYPE_PV4DI_V4DI:
37909 case VOID_FTYPE_PV2DI_V2DI:
37910 case VOID_FTYPE_PCHAR_V32QI:
37911 case VOID_FTYPE_PCHAR_V16QI:
37912 case VOID_FTYPE_PFLOAT_V16SF:
37913 case VOID_FTYPE_PFLOAT_V8SF:
37914 case VOID_FTYPE_PFLOAT_V4SF:
37915 case VOID_FTYPE_PDOUBLE_V8DF:
37916 case VOID_FTYPE_PDOUBLE_V4DF:
37917 case VOID_FTYPE_PDOUBLE_V2DF:
37918 case VOID_FTYPE_PLONGLONG_LONGLONG:
37919 case VOID_FTYPE_PULONGLONG_ULONGLONG:
37920 case VOID_FTYPE_PINT_INT:
37921 nargs = 1;
37922 klass = store;
37923 /* Reserve memory operand for target. */
37924 memory = ARRAY_SIZE (args);
37925 switch (icode)
37927 /* These builtins and instructions require the memory
37928 to be properly aligned. */
37929 case CODE_FOR_avx_movntv4di:
37930 case CODE_FOR_sse2_movntv2di:
37931 case CODE_FOR_avx_movntv8sf:
37932 case CODE_FOR_sse_movntv4sf:
37933 case CODE_FOR_sse4a_vmmovntv4sf:
37934 case CODE_FOR_avx_movntv4df:
37935 case CODE_FOR_sse2_movntv2df:
37936 case CODE_FOR_sse4a_vmmovntv2df:
37937 case CODE_FOR_sse2_movntidi:
37938 case CODE_FOR_sse_movntq:
37939 case CODE_FOR_sse2_movntisi:
37940 case CODE_FOR_avx512f_movntv16sf:
37941 case CODE_FOR_avx512f_movntv8df:
37942 case CODE_FOR_avx512f_movntv8di:
37943 aligned_mem = true;
37944 break;
37945 default:
37946 break;
37948 break;
37949 case V4SF_FTYPE_V4SF_PCV2SF:
37950 case V2DF_FTYPE_V2DF_PCDOUBLE:
37951 nargs = 2;
37952 klass = load;
37953 memory = 1;
37954 break;
37955 case V8SF_FTYPE_PCV8SF_V8SI:
37956 case V4DF_FTYPE_PCV4DF_V4DI:
37957 case V4SF_FTYPE_PCV4SF_V4SI:
37958 case V2DF_FTYPE_PCV2DF_V2DI:
37959 case V8SI_FTYPE_PCV8SI_V8SI:
37960 case V4DI_FTYPE_PCV4DI_V4DI:
37961 case V4SI_FTYPE_PCV4SI_V4SI:
37962 case V2DI_FTYPE_PCV2DI_V2DI:
37963 nargs = 2;
37964 klass = load;
37965 memory = 0;
37966 break;
37967 case VOID_FTYPE_PV8DF_V8DF_QI:
37968 case VOID_FTYPE_PV16SF_V16SF_HI:
37969 case VOID_FTYPE_PV8DI_V8DI_QI:
37970 case VOID_FTYPE_PV4DI_V4DI_QI:
37971 case VOID_FTYPE_PV2DI_V2DI_QI:
37972 case VOID_FTYPE_PV16SI_V16SI_HI:
37973 case VOID_FTYPE_PV8SI_V8SI_QI:
37974 case VOID_FTYPE_PV4SI_V4SI_QI:
37975 switch (icode)
37977 /* These builtins and instructions require the memory
37978 to be properly aligned. */
37979 case CODE_FOR_avx512f_storev16sf_mask:
37980 case CODE_FOR_avx512f_storev16si_mask:
37981 case CODE_FOR_avx512f_storev8df_mask:
37982 case CODE_FOR_avx512f_storev8di_mask:
37983 case CODE_FOR_avx512vl_storev8sf_mask:
37984 case CODE_FOR_avx512vl_storev8si_mask:
37985 case CODE_FOR_avx512vl_storev4df_mask:
37986 case CODE_FOR_avx512vl_storev4di_mask:
37987 case CODE_FOR_avx512vl_storev4sf_mask:
37988 case CODE_FOR_avx512vl_storev4si_mask:
37989 case CODE_FOR_avx512vl_storev2df_mask:
37990 case CODE_FOR_avx512vl_storev2di_mask:
37991 aligned_mem = true;
37992 break;
37993 default:
37994 break;
37996 /* FALLTHRU */
37997 case VOID_FTYPE_PV8SF_V8SI_V8SF:
37998 case VOID_FTYPE_PV4DF_V4DI_V4DF:
37999 case VOID_FTYPE_PV4SF_V4SI_V4SF:
38000 case VOID_FTYPE_PV2DF_V2DI_V2DF:
38001 case VOID_FTYPE_PV8SI_V8SI_V8SI:
38002 case VOID_FTYPE_PV4DI_V4DI_V4DI:
38003 case VOID_FTYPE_PV4SI_V4SI_V4SI:
38004 case VOID_FTYPE_PV2DI_V2DI_V2DI:
38005 case VOID_FTYPE_PDOUBLE_V2DF_QI:
38006 case VOID_FTYPE_PFLOAT_V4SF_QI:
38007 case VOID_FTYPE_PV8SI_V8DI_QI:
38008 case VOID_FTYPE_PV8HI_V8DI_QI:
38009 case VOID_FTYPE_PV16HI_V16SI_HI:
38010 case VOID_FTYPE_PV16QI_V8DI_QI:
38011 case VOID_FTYPE_PV16QI_V16SI_HI:
38012 case VOID_FTYPE_PV4SI_V4DI_QI:
38013 case VOID_FTYPE_PV4SI_V2DI_QI:
38014 case VOID_FTYPE_PV8HI_V4DI_QI:
38015 case VOID_FTYPE_PV8HI_V2DI_QI:
38016 case VOID_FTYPE_PV8HI_V8SI_QI:
38017 case VOID_FTYPE_PV8HI_V4SI_QI:
38018 case VOID_FTYPE_PV16QI_V4DI_QI:
38019 case VOID_FTYPE_PV16QI_V2DI_QI:
38020 case VOID_FTYPE_PV16QI_V8SI_QI:
38021 case VOID_FTYPE_PV16QI_V4SI_QI:
38022 case VOID_FTYPE_PV8HI_V8HI_QI:
38023 case VOID_FTYPE_PV16HI_V16HI_HI:
38024 case VOID_FTYPE_PV32HI_V32HI_SI:
38025 case VOID_FTYPE_PV16QI_V16QI_HI:
38026 case VOID_FTYPE_PV32QI_V32QI_SI:
38027 case VOID_FTYPE_PV64QI_V64QI_DI:
38028 case VOID_FTYPE_PV4DF_V4DF_QI:
38029 case VOID_FTYPE_PV2DF_V2DF_QI:
38030 case VOID_FTYPE_PV8SF_V8SF_QI:
38031 case VOID_FTYPE_PV4SF_V4SF_QI:
38032 nargs = 2;
38033 klass = store;
38034 /* Reserve memory operand for target. */
38035 memory = ARRAY_SIZE (args);
38036 break;
38037 case V4SF_FTYPE_PCV4SF_V4SF_QI:
38038 case V8SF_FTYPE_PCV8SF_V8SF_QI:
38039 case V16SF_FTYPE_PCV16SF_V16SF_HI:
38040 case V4SI_FTYPE_PCV4SI_V4SI_QI:
38041 case V8SI_FTYPE_PCV8SI_V8SI_QI:
38042 case V16SI_FTYPE_PCV16SI_V16SI_HI:
38043 case V2DF_FTYPE_PCV2DF_V2DF_QI:
38044 case V4DF_FTYPE_PCV4DF_V4DF_QI:
38045 case V8DF_FTYPE_PCV8DF_V8DF_QI:
38046 case V2DI_FTYPE_PCV2DI_V2DI_QI:
38047 case V4DI_FTYPE_PCV4DI_V4DI_QI:
38048 case V8DI_FTYPE_PCV8DI_V8DI_QI:
38049 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
38050 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
38051 case V8HI_FTYPE_PCV8HI_V8HI_QI:
38052 case V16HI_FTYPE_PCV16HI_V16HI_HI:
38053 case V32HI_FTYPE_PCV32HI_V32HI_SI:
38054 case V16QI_FTYPE_PCV16QI_V16QI_HI:
38055 case V32QI_FTYPE_PCV32QI_V32QI_SI:
38056 case V64QI_FTYPE_PCV64QI_V64QI_DI:
38057 nargs = 3;
38058 klass = load;
38059 memory = 0;
38060 switch (icode)
38062 /* These builtins and instructions require the memory
38063 to be properly aligned. */
38064 case CODE_FOR_avx512f_loadv16sf_mask:
38065 case CODE_FOR_avx512f_loadv16si_mask:
38066 case CODE_FOR_avx512f_loadv8df_mask:
38067 case CODE_FOR_avx512f_loadv8di_mask:
38068 case CODE_FOR_avx512vl_loadv8sf_mask:
38069 case CODE_FOR_avx512vl_loadv8si_mask:
38070 case CODE_FOR_avx512vl_loadv4df_mask:
38071 case CODE_FOR_avx512vl_loadv4di_mask:
38072 case CODE_FOR_avx512vl_loadv4sf_mask:
38073 case CODE_FOR_avx512vl_loadv4si_mask:
38074 case CODE_FOR_avx512vl_loadv2df_mask:
38075 case CODE_FOR_avx512vl_loadv2di_mask:
38076 case CODE_FOR_avx512bw_loadv64qi_mask:
38077 case CODE_FOR_avx512vl_loadv32qi_mask:
38078 case CODE_FOR_avx512vl_loadv16qi_mask:
38079 case CODE_FOR_avx512bw_loadv32hi_mask:
38080 case CODE_FOR_avx512vl_loadv16hi_mask:
38081 case CODE_FOR_avx512vl_loadv8hi_mask:
38082 aligned_mem = true;
38083 break;
38084 default:
38085 break;
38087 break;
38088 case VOID_FTYPE_UINT_UINT_UINT:
38089 case VOID_FTYPE_UINT64_UINT_UINT:
38090 case UCHAR_FTYPE_UINT_UINT_UINT:
38091 case UCHAR_FTYPE_UINT64_UINT_UINT:
38092 nargs = 3;
38093 klass = load;
38094 memory = ARRAY_SIZE (args);
38095 last_arg_constant = true;
38096 break;
38097 default:
38098 gcc_unreachable ();
38101 gcc_assert (nargs <= ARRAY_SIZE (args));
38103 if (klass == store)
38105 arg = CALL_EXPR_ARG (exp, 0);
38106 op = expand_normal (arg);
38107 gcc_assert (target == 0);
38108 if (memory)
38110 op = ix86_zero_extend_to_Pmode (op);
38111 target = gen_rtx_MEM (tmode, op);
38112 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
38113 on it. Try to improve it using get_pointer_alignment,
38114 and if the special builtin is one that requires strict
38115 mode alignment, also from it's GET_MODE_ALIGNMENT.
38116 Failure to do so could lead to ix86_legitimate_combined_insn
38117 rejecting all changes to such insns. */
38118 unsigned int align = get_pointer_alignment (arg);
38119 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
38120 align = GET_MODE_ALIGNMENT (tmode);
38121 if (MEM_ALIGN (target) < align)
38122 set_mem_align (target, align);
38124 else
38125 target = force_reg (tmode, op);
38126 arg_adjust = 1;
38128 else
38130 arg_adjust = 0;
38131 if (optimize
38132 || target == 0
38133 || !register_operand (target, tmode)
38134 || GET_MODE (target) != tmode)
38135 target = gen_reg_rtx (tmode);
38138 for (i = 0; i < nargs; i++)
38140 machine_mode mode = insn_p->operand[i + 1].mode;
38141 bool match;
38143 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
38144 op = expand_normal (arg);
38145 match = insn_p->operand[i + 1].predicate (op, mode);
38147 if (last_arg_constant && (i + 1) == nargs)
38149 if (!match)
38151 if (icode == CODE_FOR_lwp_lwpvalsi3
38152 || icode == CODE_FOR_lwp_lwpinssi3
38153 || icode == CODE_FOR_lwp_lwpvaldi3
38154 || icode == CODE_FOR_lwp_lwpinsdi3)
38155 error ("the last argument must be a 32-bit immediate");
38156 else
38157 error ("the last argument must be an 8-bit immediate");
38158 return const0_rtx;
38161 else
38163 if (i == memory)
38165 /* This must be the memory operand. */
38166 op = ix86_zero_extend_to_Pmode (op);
38167 op = gen_rtx_MEM (mode, op);
38168 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
38169 on it. Try to improve it using get_pointer_alignment,
38170 and if the special builtin is one that requires strict
38171 mode alignment, also from it's GET_MODE_ALIGNMENT.
38172 Failure to do so could lead to ix86_legitimate_combined_insn
38173 rejecting all changes to such insns. */
38174 unsigned int align = get_pointer_alignment (arg);
38175 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
38176 align = GET_MODE_ALIGNMENT (mode);
38177 if (MEM_ALIGN (op) < align)
38178 set_mem_align (op, align);
38180 else
38182 /* This must be register. */
38183 if (VECTOR_MODE_P (mode))
38184 op = safe_vector_operand (op, mode);
38186 op = fixup_modeless_constant (op, mode);
38188 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38189 op = copy_to_mode_reg (mode, op);
38190 else
38192 op = copy_to_reg (op);
38193 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38198 args[i].op = op;
38199 args[i].mode = mode;
38202 switch (nargs)
38204 case 0:
38205 pat = GEN_FCN (icode) (target);
38206 break;
38207 case 1:
38208 pat = GEN_FCN (icode) (target, args[0].op);
38209 break;
38210 case 2:
38211 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38212 break;
38213 case 3:
38214 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38215 break;
38216 default:
38217 gcc_unreachable ();
38220 if (! pat)
38221 return 0;
38222 emit_insn (pat);
38223 return klass == store ? 0 : target;
38226 /* Return the integer constant in ARG. Constrain it to be in the range
38227 of the subparts of VEC_TYPE; issue an error if not. */
38229 static int
38230 get_element_number (tree vec_type, tree arg)
38232 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
38234 if (!tree_fits_uhwi_p (arg)
38235 || (elt = tree_to_uhwi (arg), elt > max))
38237 error ("selector must be an integer constant in the range 0..%wi", max);
38238 return 0;
38241 return elt;
38244 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38245 ix86_expand_vector_init. We DO have language-level syntax for this, in
38246 the form of (type){ init-list }. Except that since we can't place emms
38247 instructions from inside the compiler, we can't allow the use of MMX
38248 registers unless the user explicitly asks for it. So we do *not* define
38249 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
38250 we have builtins invoked by mmintrin.h that gives us license to emit
38251 these sorts of instructions. */
38253 static rtx
38254 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
38256 machine_mode tmode = TYPE_MODE (type);
38257 machine_mode inner_mode = GET_MODE_INNER (tmode);
38258 int i, n_elt = GET_MODE_NUNITS (tmode);
38259 rtvec v = rtvec_alloc (n_elt);
38261 gcc_assert (VECTOR_MODE_P (tmode));
38262 gcc_assert (call_expr_nargs (exp) == n_elt);
38264 for (i = 0; i < n_elt; ++i)
38266 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38267 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38270 if (!target || !register_operand (target, tmode))
38271 target = gen_reg_rtx (tmode);
38273 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38274 return target;
38277 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38278 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
38279 had a language-level syntax for referencing vector elements. */
38281 static rtx
38282 ix86_expand_vec_ext_builtin (tree exp, rtx target)
38284 machine_mode tmode, mode0;
38285 tree arg0, arg1;
38286 int elt;
38287 rtx op0;
38289 arg0 = CALL_EXPR_ARG (exp, 0);
38290 arg1 = CALL_EXPR_ARG (exp, 1);
38292 op0 = expand_normal (arg0);
38293 elt = get_element_number (TREE_TYPE (arg0), arg1);
38295 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38296 mode0 = TYPE_MODE (TREE_TYPE (arg0));
38297 gcc_assert (VECTOR_MODE_P (mode0));
38299 op0 = force_reg (mode0, op0);
38301 if (optimize || !target || !register_operand (target, tmode))
38302 target = gen_reg_rtx (tmode);
38304 ix86_expand_vector_extract (true, target, op0, elt);
38306 return target;
38309 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38310 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
38311 a language-level syntax for referencing vector elements. */
38313 static rtx
38314 ix86_expand_vec_set_builtin (tree exp)
38316 machine_mode tmode, mode1;
38317 tree arg0, arg1, arg2;
38318 int elt;
38319 rtx op0, op1, target;
38321 arg0 = CALL_EXPR_ARG (exp, 0);
38322 arg1 = CALL_EXPR_ARG (exp, 1);
38323 arg2 = CALL_EXPR_ARG (exp, 2);
38325 tmode = TYPE_MODE (TREE_TYPE (arg0));
38326 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38327 gcc_assert (VECTOR_MODE_P (tmode));
38329 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38330 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38331 elt = get_element_number (TREE_TYPE (arg0), arg2);
38333 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38334 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38336 op0 = force_reg (tmode, op0);
38337 op1 = force_reg (mode1, op1);
38339 /* OP0 is the source of these builtin functions and shouldn't be
38340 modified. Create a copy, use it and return it as target. */
38341 target = gen_reg_rtx (tmode);
38342 emit_move_insn (target, op0);
38343 ix86_expand_vector_set (true, target, op1, elt);
38345 return target;
38348 /* Emit conditional move of SRC to DST with condition
38349 OP1 CODE OP2. */
38350 static void
38351 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38353 rtx t;
38355 if (TARGET_CMOVE)
38357 t = ix86_expand_compare (code, op1, op2);
38358 emit_insn (gen_rtx_SET (VOIDmode, dst,
38359 gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38360 src, dst)));
38362 else
38364 rtx nomove = gen_label_rtx ();
38365 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38366 const0_rtx, GET_MODE (op1), 1, nomove);
38367 emit_move_insn (dst, src);
38368 emit_label (nomove);
38372 /* Choose max of DST and SRC and put it to DST. */
38373 static void
38374 ix86_emit_move_max (rtx dst, rtx src)
38376 ix86_emit_cmove (dst, src, LTU, dst, src);
38379 /* Expand an expression EXP that calls a built-in function,
38380 with result going to TARGET if that's convenient
38381 (and in mode MODE if that's convenient).
38382 SUBTARGET may be used as the target for computing one of EXP's operands.
38383 IGNORE is nonzero if the value is to be ignored. */
38385 static rtx
38386 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38387 machine_mode mode, int ignore)
38389 const struct builtin_description *d;
38390 size_t i;
38391 enum insn_code icode;
38392 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38393 tree arg0, arg1, arg2, arg3, arg4;
38394 rtx op0, op1, op2, op3, op4, pat, insn;
38395 machine_mode mode0, mode1, mode2, mode3, mode4;
38396 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38398 /* For CPU builtins that can be folded, fold first and expand the fold. */
38399 switch (fcode)
38401 case IX86_BUILTIN_CPU_INIT:
38403 /* Make it call __cpu_indicator_init in libgcc. */
38404 tree call_expr, fndecl, type;
38405 type = build_function_type_list (integer_type_node, NULL_TREE);
38406 fndecl = build_fn_decl ("__cpu_indicator_init", type);
38407 call_expr = build_call_expr (fndecl, 0);
38408 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38410 case IX86_BUILTIN_CPU_IS:
38411 case IX86_BUILTIN_CPU_SUPPORTS:
38413 tree arg0 = CALL_EXPR_ARG (exp, 0);
38414 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38415 gcc_assert (fold_expr != NULL_TREE);
38416 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38420 /* Determine whether the builtin function is available under the current ISA.
38421 Originally the builtin was not created if it wasn't applicable to the
38422 current ISA based on the command line switches. With function specific
38423 options, we need to check in the context of the function making the call
38424 whether it is supported. */
38425 if (ix86_builtins_isa[fcode].isa
38426 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38428 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38429 NULL, (enum fpmath_unit) 0, false);
38431 if (!opts)
38432 error ("%qE needs unknown isa option", fndecl);
38433 else
38435 gcc_assert (opts != NULL);
38436 error ("%qE needs isa option %s", fndecl, opts);
38437 free (opts);
38439 return const0_rtx;
38442 switch (fcode)
38444 case IX86_BUILTIN_BNDMK:
38445 if (!target
38446 || GET_MODE (target) != BNDmode
38447 || !register_operand (target, BNDmode))
38448 target = gen_reg_rtx (BNDmode);
38450 arg0 = CALL_EXPR_ARG (exp, 0);
38451 arg1 = CALL_EXPR_ARG (exp, 1);
38453 op0 = expand_normal (arg0);
38454 op1 = expand_normal (arg1);
38456 if (!register_operand (op0, Pmode))
38457 op0 = ix86_zero_extend_to_Pmode (op0);
38458 if (!register_operand (op1, Pmode))
38459 op1 = ix86_zero_extend_to_Pmode (op1);
38461 /* Builtin arg1 is size of block but instruction op1 should
38462 be (size - 1). */
38463 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38464 NULL_RTX, 1, OPTAB_DIRECT);
38466 emit_insn (BNDmode == BND64mode
38467 ? gen_bnd64_mk (target, op0, op1)
38468 : gen_bnd32_mk (target, op0, op1));
38469 return target;
38471 case IX86_BUILTIN_BNDSTX:
38472 arg0 = CALL_EXPR_ARG (exp, 0);
38473 arg1 = CALL_EXPR_ARG (exp, 1);
38474 arg2 = CALL_EXPR_ARG (exp, 2);
38476 op0 = expand_normal (arg0);
38477 op1 = expand_normal (arg1);
38478 op2 = expand_normal (arg2);
38480 if (!register_operand (op0, Pmode))
38481 op0 = ix86_zero_extend_to_Pmode (op0);
38482 if (!register_operand (op1, BNDmode))
38483 op1 = copy_to_mode_reg (BNDmode, op1);
38484 if (!register_operand (op2, Pmode))
38485 op2 = ix86_zero_extend_to_Pmode (op2);
38487 emit_insn (BNDmode == BND64mode
38488 ? gen_bnd64_stx (op2, op0, op1)
38489 : gen_bnd32_stx (op2, op0, op1));
38490 return 0;
38492 case IX86_BUILTIN_BNDLDX:
38493 if (!target
38494 || GET_MODE (target) != BNDmode
38495 || !register_operand (target, BNDmode))
38496 target = gen_reg_rtx (BNDmode);
38498 arg0 = CALL_EXPR_ARG (exp, 0);
38499 arg1 = CALL_EXPR_ARG (exp, 1);
38501 op0 = expand_normal (arg0);
38502 op1 = expand_normal (arg1);
38504 if (!register_operand (op0, Pmode))
38505 op0 = ix86_zero_extend_to_Pmode (op0);
38506 if (!register_operand (op1, Pmode))
38507 op1 = ix86_zero_extend_to_Pmode (op1);
38509 emit_insn (BNDmode == BND64mode
38510 ? gen_bnd64_ldx (target, op0, op1)
38511 : gen_bnd32_ldx (target, op0, op1));
38512 return target;
38514 case IX86_BUILTIN_BNDCL:
38515 arg0 = CALL_EXPR_ARG (exp, 0);
38516 arg1 = CALL_EXPR_ARG (exp, 1);
38518 op0 = expand_normal (arg0);
38519 op1 = expand_normal (arg1);
38521 if (!register_operand (op0, Pmode))
38522 op0 = ix86_zero_extend_to_Pmode (op0);
38523 if (!register_operand (op1, BNDmode))
38524 op1 = copy_to_mode_reg (BNDmode, op1);
38526 emit_insn (BNDmode == BND64mode
38527 ? gen_bnd64_cl (op1, op0)
38528 : gen_bnd32_cl (op1, op0));
38529 return 0;
38531 case IX86_BUILTIN_BNDCU:
38532 arg0 = CALL_EXPR_ARG (exp, 0);
38533 arg1 = CALL_EXPR_ARG (exp, 1);
38535 op0 = expand_normal (arg0);
38536 op1 = expand_normal (arg1);
38538 if (!register_operand (op0, Pmode))
38539 op0 = ix86_zero_extend_to_Pmode (op0);
38540 if (!register_operand (op1, BNDmode))
38541 op1 = copy_to_mode_reg (BNDmode, op1);
38543 emit_insn (BNDmode == BND64mode
38544 ? gen_bnd64_cu (op1, op0)
38545 : gen_bnd32_cu (op1, op0));
38546 return 0;
38548 case IX86_BUILTIN_BNDRET:
38549 arg0 = CALL_EXPR_ARG (exp, 0);
38550 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38551 target = chkp_get_rtl_bounds (arg0);
38553 /* If no bounds were specified for returned value,
38554 then use INIT bounds. It usually happens when
38555 some built-in function is expanded. */
38556 if (!target)
38558 rtx t1 = gen_reg_rtx (Pmode);
38559 rtx t2 = gen_reg_rtx (Pmode);
38560 target = gen_reg_rtx (BNDmode);
38561 emit_move_insn (t1, const0_rtx);
38562 emit_move_insn (t2, constm1_rtx);
38563 emit_insn (BNDmode == BND64mode
38564 ? gen_bnd64_mk (target, t1, t2)
38565 : gen_bnd32_mk (target, t1, t2));
38568 gcc_assert (target && REG_P (target));
38569 return target;
38571 case IX86_BUILTIN_BNDNARROW:
38573 rtx m1, m1h1, m1h2, lb, ub, t1;
38575 /* Return value and lb. */
38576 arg0 = CALL_EXPR_ARG (exp, 0);
38577 /* Bounds. */
38578 arg1 = CALL_EXPR_ARG (exp, 1);
38579 /* Size. */
38580 arg2 = CALL_EXPR_ARG (exp, 2);
38582 lb = expand_normal (arg0);
38583 op1 = expand_normal (arg1);
38584 op2 = expand_normal (arg2);
38586 /* Size was passed but we need to use (size - 1) as for bndmk. */
38587 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38588 NULL_RTX, 1, OPTAB_DIRECT);
38590 /* Add LB to size and inverse to get UB. */
38591 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38592 op2, 1, OPTAB_DIRECT);
38593 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38595 if (!register_operand (lb, Pmode))
38596 lb = ix86_zero_extend_to_Pmode (lb);
38597 if (!register_operand (ub, Pmode))
38598 ub = ix86_zero_extend_to_Pmode (ub);
38600 /* We need to move bounds to memory before any computations. */
38601 if (MEM_P (op1))
38602 m1 = op1;
38603 else
38605 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38606 emit_move_insn (m1, op1);
38609 /* Generate mem expression to be used for access to LB and UB. */
38610 m1h1 = adjust_address (m1, Pmode, 0);
38611 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38613 t1 = gen_reg_rtx (Pmode);
38615 /* Compute LB. */
38616 emit_move_insn (t1, m1h1);
38617 ix86_emit_move_max (t1, lb);
38618 emit_move_insn (m1h1, t1);
38620 /* Compute UB. UB is stored in 1's complement form. Therefore
38621 we also use max here. */
38622 emit_move_insn (t1, m1h2);
38623 ix86_emit_move_max (t1, ub);
38624 emit_move_insn (m1h2, t1);
38626 op2 = gen_reg_rtx (BNDmode);
38627 emit_move_insn (op2, m1);
38629 return chkp_join_splitted_slot (lb, op2);
38632 case IX86_BUILTIN_BNDINT:
38634 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38636 if (!target
38637 || GET_MODE (target) != BNDmode
38638 || !register_operand (target, BNDmode))
38639 target = gen_reg_rtx (BNDmode);
38641 arg0 = CALL_EXPR_ARG (exp, 0);
38642 arg1 = CALL_EXPR_ARG (exp, 1);
38644 op0 = expand_normal (arg0);
38645 op1 = expand_normal (arg1);
38647 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38648 rh1 = adjust_address (res, Pmode, 0);
38649 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38651 /* Put first bounds to temporaries. */
38652 lb1 = gen_reg_rtx (Pmode);
38653 ub1 = gen_reg_rtx (Pmode);
38654 if (MEM_P (op0))
38656 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38657 emit_move_insn (ub1, adjust_address (op0, Pmode,
38658 GET_MODE_SIZE (Pmode)));
38660 else
38662 emit_move_insn (res, op0);
38663 emit_move_insn (lb1, rh1);
38664 emit_move_insn (ub1, rh2);
38667 /* Put second bounds to temporaries. */
38668 lb2 = gen_reg_rtx (Pmode);
38669 ub2 = gen_reg_rtx (Pmode);
38670 if (MEM_P (op1))
38672 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38673 emit_move_insn (ub2, adjust_address (op1, Pmode,
38674 GET_MODE_SIZE (Pmode)));
38676 else
38678 emit_move_insn (res, op1);
38679 emit_move_insn (lb2, rh1);
38680 emit_move_insn (ub2, rh2);
38683 /* Compute LB. */
38684 ix86_emit_move_max (lb1, lb2);
38685 emit_move_insn (rh1, lb1);
38687 /* Compute UB. UB is stored in 1's complement form. Therefore
38688 we also use max here. */
38689 ix86_emit_move_max (ub1, ub2);
38690 emit_move_insn (rh2, ub1);
38692 emit_move_insn (target, res);
38694 return target;
38697 case IX86_BUILTIN_SIZEOF:
38699 tree name;
38700 rtx symbol;
38702 if (!target
38703 || GET_MODE (target) != Pmode
38704 || !register_operand (target, Pmode))
38705 target = gen_reg_rtx (Pmode);
38707 arg0 = CALL_EXPR_ARG (exp, 0);
38708 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
38710 name = DECL_ASSEMBLER_NAME (arg0);
38711 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
38713 emit_insn (Pmode == SImode
38714 ? gen_move_size_reloc_si (target, symbol)
38715 : gen_move_size_reloc_di (target, symbol));
38717 return target;
38720 case IX86_BUILTIN_BNDLOWER:
38722 rtx mem, hmem;
38724 if (!target
38725 || GET_MODE (target) != Pmode
38726 || !register_operand (target, Pmode))
38727 target = gen_reg_rtx (Pmode);
38729 arg0 = CALL_EXPR_ARG (exp, 0);
38730 op0 = expand_normal (arg0);
38732 /* We need to move bounds to memory first. */
38733 if (MEM_P (op0))
38734 mem = op0;
38735 else
38737 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38738 emit_move_insn (mem, op0);
38741 /* Generate mem expression to access LB and load it. */
38742 hmem = adjust_address (mem, Pmode, 0);
38743 emit_move_insn (target, hmem);
38745 return target;
38748 case IX86_BUILTIN_BNDUPPER:
38750 rtx mem, hmem, res;
38752 if (!target
38753 || GET_MODE (target) != Pmode
38754 || !register_operand (target, Pmode))
38755 target = gen_reg_rtx (Pmode);
38757 arg0 = CALL_EXPR_ARG (exp, 0);
38758 op0 = expand_normal (arg0);
38760 /* We need to move bounds to memory first. */
38761 if (MEM_P (op0))
38762 mem = op0;
38763 else
38765 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38766 emit_move_insn (mem, op0);
38769 /* Generate mem expression to access UB. */
38770 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
38772 /* We need to inverse all bits of UB. */
38773 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
38775 if (res != target)
38776 emit_move_insn (target, res);
38778 return target;
38781 case IX86_BUILTIN_MASKMOVQ:
38782 case IX86_BUILTIN_MASKMOVDQU:
38783 icode = (fcode == IX86_BUILTIN_MASKMOVQ
38784 ? CODE_FOR_mmx_maskmovq
38785 : CODE_FOR_sse2_maskmovdqu);
38786 /* Note the arg order is different from the operand order. */
38787 arg1 = CALL_EXPR_ARG (exp, 0);
38788 arg2 = CALL_EXPR_ARG (exp, 1);
38789 arg0 = CALL_EXPR_ARG (exp, 2);
38790 op0 = expand_normal (arg0);
38791 op1 = expand_normal (arg1);
38792 op2 = expand_normal (arg2);
38793 mode0 = insn_data[icode].operand[0].mode;
38794 mode1 = insn_data[icode].operand[1].mode;
38795 mode2 = insn_data[icode].operand[2].mode;
38797 op0 = ix86_zero_extend_to_Pmode (op0);
38798 op0 = gen_rtx_MEM (mode1, op0);
38800 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38801 op0 = copy_to_mode_reg (mode0, op0);
38802 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38803 op1 = copy_to_mode_reg (mode1, op1);
38804 if (!insn_data[icode].operand[2].predicate (op2, mode2))
38805 op2 = copy_to_mode_reg (mode2, op2);
38806 pat = GEN_FCN (icode) (op0, op1, op2);
38807 if (! pat)
38808 return 0;
38809 emit_insn (pat);
38810 return 0;
38812 case IX86_BUILTIN_LDMXCSR:
38813 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
38814 target = assign_386_stack_local (SImode, SLOT_TEMP);
38815 emit_move_insn (target, op0);
38816 emit_insn (gen_sse_ldmxcsr (target));
38817 return 0;
38819 case IX86_BUILTIN_STMXCSR:
38820 target = assign_386_stack_local (SImode, SLOT_TEMP);
38821 emit_insn (gen_sse_stmxcsr (target));
38822 return copy_to_mode_reg (SImode, target);
38824 case IX86_BUILTIN_CLFLUSH:
38825 arg0 = CALL_EXPR_ARG (exp, 0);
38826 op0 = expand_normal (arg0);
38827 icode = CODE_FOR_sse2_clflush;
38828 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38829 op0 = ix86_zero_extend_to_Pmode (op0);
38831 emit_insn (gen_sse2_clflush (op0));
38832 return 0;
38834 case IX86_BUILTIN_CLWB:
38835 arg0 = CALL_EXPR_ARG (exp, 0);
38836 op0 = expand_normal (arg0);
38837 icode = CODE_FOR_clwb;
38838 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38839 op0 = ix86_zero_extend_to_Pmode (op0);
38841 emit_insn (gen_clwb (op0));
38842 return 0;
38844 case IX86_BUILTIN_CLFLUSHOPT:
38845 arg0 = CALL_EXPR_ARG (exp, 0);
38846 op0 = expand_normal (arg0);
38847 icode = CODE_FOR_clflushopt;
38848 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38849 op0 = ix86_zero_extend_to_Pmode (op0);
38851 emit_insn (gen_clflushopt (op0));
38852 return 0;
38854 case IX86_BUILTIN_MONITOR:
38855 arg0 = CALL_EXPR_ARG (exp, 0);
38856 arg1 = CALL_EXPR_ARG (exp, 1);
38857 arg2 = CALL_EXPR_ARG (exp, 2);
38858 op0 = expand_normal (arg0);
38859 op1 = expand_normal (arg1);
38860 op2 = expand_normal (arg2);
38861 if (!REG_P (op0))
38862 op0 = ix86_zero_extend_to_Pmode (op0);
38863 if (!REG_P (op1))
38864 op1 = copy_to_mode_reg (SImode, op1);
38865 if (!REG_P (op2))
38866 op2 = copy_to_mode_reg (SImode, op2);
38867 emit_insn (ix86_gen_monitor (op0, op1, op2));
38868 return 0;
38870 case IX86_BUILTIN_MWAIT:
38871 arg0 = CALL_EXPR_ARG (exp, 0);
38872 arg1 = CALL_EXPR_ARG (exp, 1);
38873 op0 = expand_normal (arg0);
38874 op1 = expand_normal (arg1);
38875 if (!REG_P (op0))
38876 op0 = copy_to_mode_reg (SImode, op0);
38877 if (!REG_P (op1))
38878 op1 = copy_to_mode_reg (SImode, op1);
38879 emit_insn (gen_sse3_mwait (op0, op1));
38880 return 0;
38882 case IX86_BUILTIN_VEC_INIT_V2SI:
38883 case IX86_BUILTIN_VEC_INIT_V4HI:
38884 case IX86_BUILTIN_VEC_INIT_V8QI:
38885 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
38887 case IX86_BUILTIN_VEC_EXT_V2DF:
38888 case IX86_BUILTIN_VEC_EXT_V2DI:
38889 case IX86_BUILTIN_VEC_EXT_V4SF:
38890 case IX86_BUILTIN_VEC_EXT_V4SI:
38891 case IX86_BUILTIN_VEC_EXT_V8HI:
38892 case IX86_BUILTIN_VEC_EXT_V2SI:
38893 case IX86_BUILTIN_VEC_EXT_V4HI:
38894 case IX86_BUILTIN_VEC_EXT_V16QI:
38895 return ix86_expand_vec_ext_builtin (exp, target);
38897 case IX86_BUILTIN_VEC_SET_V2DI:
38898 case IX86_BUILTIN_VEC_SET_V4SF:
38899 case IX86_BUILTIN_VEC_SET_V4SI:
38900 case IX86_BUILTIN_VEC_SET_V8HI:
38901 case IX86_BUILTIN_VEC_SET_V4HI:
38902 case IX86_BUILTIN_VEC_SET_V16QI:
38903 return ix86_expand_vec_set_builtin (exp);
38905 case IX86_BUILTIN_INFQ:
38906 case IX86_BUILTIN_HUGE_VALQ:
38908 REAL_VALUE_TYPE inf;
38909 rtx tmp;
38911 real_inf (&inf);
38912 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
38914 tmp = validize_mem (force_const_mem (mode, tmp));
38916 if (target == 0)
38917 target = gen_reg_rtx (mode);
38919 emit_move_insn (target, tmp);
38920 return target;
38923 case IX86_BUILTIN_RDPMC:
38924 case IX86_BUILTIN_RDTSC:
38925 case IX86_BUILTIN_RDTSCP:
38927 op0 = gen_reg_rtx (DImode);
38928 op1 = gen_reg_rtx (DImode);
38930 if (fcode == IX86_BUILTIN_RDPMC)
38932 arg0 = CALL_EXPR_ARG (exp, 0);
38933 op2 = expand_normal (arg0);
38934 if (!register_operand (op2, SImode))
38935 op2 = copy_to_mode_reg (SImode, op2);
38937 insn = (TARGET_64BIT
38938 ? gen_rdpmc_rex64 (op0, op1, op2)
38939 : gen_rdpmc (op0, op2));
38940 emit_insn (insn);
38942 else if (fcode == IX86_BUILTIN_RDTSC)
38944 insn = (TARGET_64BIT
38945 ? gen_rdtsc_rex64 (op0, op1)
38946 : gen_rdtsc (op0));
38947 emit_insn (insn);
38949 else
38951 op2 = gen_reg_rtx (SImode);
38953 insn = (TARGET_64BIT
38954 ? gen_rdtscp_rex64 (op0, op1, op2)
38955 : gen_rdtscp (op0, op2));
38956 emit_insn (insn);
38958 arg0 = CALL_EXPR_ARG (exp, 0);
38959 op4 = expand_normal (arg0);
38960 if (!address_operand (op4, VOIDmode))
38962 op4 = convert_memory_address (Pmode, op4);
38963 op4 = copy_addr_to_reg (op4);
38965 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
38968 if (target == 0)
38970 /* mode is VOIDmode if __builtin_rd* has been called
38971 without lhs. */
38972 if (mode == VOIDmode)
38973 return target;
38974 target = gen_reg_rtx (mode);
38977 if (TARGET_64BIT)
38979 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
38980 op1, 1, OPTAB_DIRECT);
38981 op0 = expand_simple_binop (DImode, IOR, op0, op1,
38982 op0, 1, OPTAB_DIRECT);
38985 emit_move_insn (target, op0);
38986 return target;
38988 case IX86_BUILTIN_FXSAVE:
38989 case IX86_BUILTIN_FXRSTOR:
38990 case IX86_BUILTIN_FXSAVE64:
38991 case IX86_BUILTIN_FXRSTOR64:
38992 case IX86_BUILTIN_FNSTENV:
38993 case IX86_BUILTIN_FLDENV:
38994 mode0 = BLKmode;
38995 switch (fcode)
38997 case IX86_BUILTIN_FXSAVE:
38998 icode = CODE_FOR_fxsave;
38999 break;
39000 case IX86_BUILTIN_FXRSTOR:
39001 icode = CODE_FOR_fxrstor;
39002 break;
39003 case IX86_BUILTIN_FXSAVE64:
39004 icode = CODE_FOR_fxsave64;
39005 break;
39006 case IX86_BUILTIN_FXRSTOR64:
39007 icode = CODE_FOR_fxrstor64;
39008 break;
39009 case IX86_BUILTIN_FNSTENV:
39010 icode = CODE_FOR_fnstenv;
39011 break;
39012 case IX86_BUILTIN_FLDENV:
39013 icode = CODE_FOR_fldenv;
39014 break;
39015 default:
39016 gcc_unreachable ();
39019 arg0 = CALL_EXPR_ARG (exp, 0);
39020 op0 = expand_normal (arg0);
39022 if (!address_operand (op0, VOIDmode))
39024 op0 = convert_memory_address (Pmode, op0);
39025 op0 = copy_addr_to_reg (op0);
39027 op0 = gen_rtx_MEM (mode0, op0);
39029 pat = GEN_FCN (icode) (op0);
39030 if (pat)
39031 emit_insn (pat);
39032 return 0;
39034 case IX86_BUILTIN_XSAVE:
39035 case IX86_BUILTIN_XRSTOR:
39036 case IX86_BUILTIN_XSAVE64:
39037 case IX86_BUILTIN_XRSTOR64:
39038 case IX86_BUILTIN_XSAVEOPT:
39039 case IX86_BUILTIN_XSAVEOPT64:
39040 case IX86_BUILTIN_XSAVES:
39041 case IX86_BUILTIN_XRSTORS:
39042 case IX86_BUILTIN_XSAVES64:
39043 case IX86_BUILTIN_XRSTORS64:
39044 case IX86_BUILTIN_XSAVEC:
39045 case IX86_BUILTIN_XSAVEC64:
39046 arg0 = CALL_EXPR_ARG (exp, 0);
39047 arg1 = CALL_EXPR_ARG (exp, 1);
39048 op0 = expand_normal (arg0);
39049 op1 = expand_normal (arg1);
39051 if (!address_operand (op0, VOIDmode))
39053 op0 = convert_memory_address (Pmode, op0);
39054 op0 = copy_addr_to_reg (op0);
39056 op0 = gen_rtx_MEM (BLKmode, op0);
39058 op1 = force_reg (DImode, op1);
39060 if (TARGET_64BIT)
39062 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
39063 NULL, 1, OPTAB_DIRECT);
39064 switch (fcode)
39066 case IX86_BUILTIN_XSAVE:
39067 icode = CODE_FOR_xsave_rex64;
39068 break;
39069 case IX86_BUILTIN_XRSTOR:
39070 icode = CODE_FOR_xrstor_rex64;
39071 break;
39072 case IX86_BUILTIN_XSAVE64:
39073 icode = CODE_FOR_xsave64;
39074 break;
39075 case IX86_BUILTIN_XRSTOR64:
39076 icode = CODE_FOR_xrstor64;
39077 break;
39078 case IX86_BUILTIN_XSAVEOPT:
39079 icode = CODE_FOR_xsaveopt_rex64;
39080 break;
39081 case IX86_BUILTIN_XSAVEOPT64:
39082 icode = CODE_FOR_xsaveopt64;
39083 break;
39084 case IX86_BUILTIN_XSAVES:
39085 icode = CODE_FOR_xsaves_rex64;
39086 break;
39087 case IX86_BUILTIN_XRSTORS:
39088 icode = CODE_FOR_xrstors_rex64;
39089 break;
39090 case IX86_BUILTIN_XSAVES64:
39091 icode = CODE_FOR_xsaves64;
39092 break;
39093 case IX86_BUILTIN_XRSTORS64:
39094 icode = CODE_FOR_xrstors64;
39095 break;
39096 case IX86_BUILTIN_XSAVEC:
39097 icode = CODE_FOR_xsavec_rex64;
39098 break;
39099 case IX86_BUILTIN_XSAVEC64:
39100 icode = CODE_FOR_xsavec64;
39101 break;
39102 default:
39103 gcc_unreachable ();
39106 op2 = gen_lowpart (SImode, op2);
39107 op1 = gen_lowpart (SImode, op1);
39108 pat = GEN_FCN (icode) (op0, op1, op2);
39110 else
39112 switch (fcode)
39114 case IX86_BUILTIN_XSAVE:
39115 icode = CODE_FOR_xsave;
39116 break;
39117 case IX86_BUILTIN_XRSTOR:
39118 icode = CODE_FOR_xrstor;
39119 break;
39120 case IX86_BUILTIN_XSAVEOPT:
39121 icode = CODE_FOR_xsaveopt;
39122 break;
39123 case IX86_BUILTIN_XSAVES:
39124 icode = CODE_FOR_xsaves;
39125 break;
39126 case IX86_BUILTIN_XRSTORS:
39127 icode = CODE_FOR_xrstors;
39128 break;
39129 case IX86_BUILTIN_XSAVEC:
39130 icode = CODE_FOR_xsavec;
39131 break;
39132 default:
39133 gcc_unreachable ();
39135 pat = GEN_FCN (icode) (op0, op1);
39138 if (pat)
39139 emit_insn (pat);
39140 return 0;
39142 case IX86_BUILTIN_LLWPCB:
39143 arg0 = CALL_EXPR_ARG (exp, 0);
39144 op0 = expand_normal (arg0);
39145 icode = CODE_FOR_lwp_llwpcb;
39146 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39147 op0 = ix86_zero_extend_to_Pmode (op0);
39148 emit_insn (gen_lwp_llwpcb (op0));
39149 return 0;
39151 case IX86_BUILTIN_SLWPCB:
39152 icode = CODE_FOR_lwp_slwpcb;
39153 if (!target
39154 || !insn_data[icode].operand[0].predicate (target, Pmode))
39155 target = gen_reg_rtx (Pmode);
39156 emit_insn (gen_lwp_slwpcb (target));
39157 return target;
39159 case IX86_BUILTIN_BEXTRI32:
39160 case IX86_BUILTIN_BEXTRI64:
39161 arg0 = CALL_EXPR_ARG (exp, 0);
39162 arg1 = CALL_EXPR_ARG (exp, 1);
39163 op0 = expand_normal (arg0);
39164 op1 = expand_normal (arg1);
39165 icode = (fcode == IX86_BUILTIN_BEXTRI32
39166 ? CODE_FOR_tbm_bextri_si
39167 : CODE_FOR_tbm_bextri_di);
39168 if (!CONST_INT_P (op1))
39170 error ("last argument must be an immediate");
39171 return const0_rtx;
39173 else
39175 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
39176 unsigned char lsb_index = INTVAL (op1) & 0xFF;
39177 op1 = GEN_INT (length);
39178 op2 = GEN_INT (lsb_index);
39179 pat = GEN_FCN (icode) (target, op0, op1, op2);
39180 if (pat)
39181 emit_insn (pat);
39182 return target;
39185 case IX86_BUILTIN_RDRAND16_STEP:
39186 icode = CODE_FOR_rdrandhi_1;
39187 mode0 = HImode;
39188 goto rdrand_step;
39190 case IX86_BUILTIN_RDRAND32_STEP:
39191 icode = CODE_FOR_rdrandsi_1;
39192 mode0 = SImode;
39193 goto rdrand_step;
39195 case IX86_BUILTIN_RDRAND64_STEP:
39196 icode = CODE_FOR_rdranddi_1;
39197 mode0 = DImode;
39199 rdrand_step:
39200 op0 = gen_reg_rtx (mode0);
39201 emit_insn (GEN_FCN (icode) (op0));
39203 arg0 = CALL_EXPR_ARG (exp, 0);
39204 op1 = expand_normal (arg0);
39205 if (!address_operand (op1, VOIDmode))
39207 op1 = convert_memory_address (Pmode, op1);
39208 op1 = copy_addr_to_reg (op1);
39210 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39212 op1 = gen_reg_rtx (SImode);
39213 emit_move_insn (op1, CONST1_RTX (SImode));
39215 /* Emit SImode conditional move. */
39216 if (mode0 == HImode)
39218 op2 = gen_reg_rtx (SImode);
39219 emit_insn (gen_zero_extendhisi2 (op2, op0));
39221 else if (mode0 == SImode)
39222 op2 = op0;
39223 else
39224 op2 = gen_rtx_SUBREG (SImode, op0, 0);
39226 if (target == 0
39227 || !register_operand (target, SImode))
39228 target = gen_reg_rtx (SImode);
39230 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
39231 const0_rtx);
39232 emit_insn (gen_rtx_SET (VOIDmode, target,
39233 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
39234 return target;
39236 case IX86_BUILTIN_RDSEED16_STEP:
39237 icode = CODE_FOR_rdseedhi_1;
39238 mode0 = HImode;
39239 goto rdseed_step;
39241 case IX86_BUILTIN_RDSEED32_STEP:
39242 icode = CODE_FOR_rdseedsi_1;
39243 mode0 = SImode;
39244 goto rdseed_step;
39246 case IX86_BUILTIN_RDSEED64_STEP:
39247 icode = CODE_FOR_rdseeddi_1;
39248 mode0 = DImode;
39250 rdseed_step:
39251 op0 = gen_reg_rtx (mode0);
39252 emit_insn (GEN_FCN (icode) (op0));
39254 arg0 = CALL_EXPR_ARG (exp, 0);
39255 op1 = expand_normal (arg0);
39256 if (!address_operand (op1, VOIDmode))
39258 op1 = convert_memory_address (Pmode, op1);
39259 op1 = copy_addr_to_reg (op1);
39261 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39263 op2 = gen_reg_rtx (QImode);
39265 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
39266 const0_rtx);
39267 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
39269 if (target == 0
39270 || !register_operand (target, SImode))
39271 target = gen_reg_rtx (SImode);
39273 emit_insn (gen_zero_extendqisi2 (target, op2));
39274 return target;
39276 case IX86_BUILTIN_SBB32:
39277 icode = CODE_FOR_subsi3_carry;
39278 mode0 = SImode;
39279 goto addcarryx;
39281 case IX86_BUILTIN_SBB64:
39282 icode = CODE_FOR_subdi3_carry;
39283 mode0 = DImode;
39284 goto addcarryx;
39286 case IX86_BUILTIN_ADDCARRYX32:
39287 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
39288 mode0 = SImode;
39289 goto addcarryx;
39291 case IX86_BUILTIN_ADDCARRYX64:
39292 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
39293 mode0 = DImode;
39295 addcarryx:
39296 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
39297 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
39298 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
39299 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
39301 op0 = gen_reg_rtx (QImode);
39303 /* Generate CF from input operand. */
39304 op1 = expand_normal (arg0);
39305 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39306 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
39308 /* Gen ADCX instruction to compute X+Y+CF. */
39309 op2 = expand_normal (arg1);
39310 op3 = expand_normal (arg2);
39312 if (!REG_P (op2))
39313 op2 = copy_to_mode_reg (mode0, op2);
39314 if (!REG_P (op3))
39315 op3 = copy_to_mode_reg (mode0, op3);
39317 op0 = gen_reg_rtx (mode0);
39319 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
39320 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
39321 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
39323 /* Store the result. */
39324 op4 = expand_normal (arg3);
39325 if (!address_operand (op4, VOIDmode))
39327 op4 = convert_memory_address (Pmode, op4);
39328 op4 = copy_addr_to_reg (op4);
39330 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39332 /* Return current CF value. */
39333 if (target == 0)
39334 target = gen_reg_rtx (QImode);
39336 PUT_MODE (pat, QImode);
39337 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
39338 return target;
39340 case IX86_BUILTIN_READ_FLAGS:
39341 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39343 if (optimize
39344 || target == NULL_RTX
39345 || !nonimmediate_operand (target, word_mode)
39346 || GET_MODE (target) != word_mode)
39347 target = gen_reg_rtx (word_mode);
39349 emit_insn (gen_pop (target));
39350 return target;
39352 case IX86_BUILTIN_WRITE_FLAGS:
39354 arg0 = CALL_EXPR_ARG (exp, 0);
39355 op0 = expand_normal (arg0);
39356 if (!general_no_elim_operand (op0, word_mode))
39357 op0 = copy_to_mode_reg (word_mode, op0);
39359 emit_insn (gen_push (op0));
39360 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39361 return 0;
39363 case IX86_BUILTIN_KORTESTC16:
39364 icode = CODE_FOR_kortestchi;
39365 mode0 = HImode;
39366 mode1 = CCCmode;
39367 goto kortest;
39369 case IX86_BUILTIN_KORTESTZ16:
39370 icode = CODE_FOR_kortestzhi;
39371 mode0 = HImode;
39372 mode1 = CCZmode;
39374 kortest:
39375 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
39376 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
39377 op0 = expand_normal (arg0);
39378 op1 = expand_normal (arg1);
39380 op0 = copy_to_reg (op0);
39381 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39382 op1 = copy_to_reg (op1);
39383 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39385 target = gen_reg_rtx (QImode);
39386 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
39388 /* Emit kortest. */
39389 emit_insn (GEN_FCN (icode) (op0, op1));
39390 /* And use setcc to return result from flags. */
39391 ix86_expand_setcc (target, EQ,
39392 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39393 return target;
39395 case IX86_BUILTIN_GATHERSIV2DF:
39396 icode = CODE_FOR_avx2_gathersiv2df;
39397 goto gather_gen;
39398 case IX86_BUILTIN_GATHERSIV4DF:
39399 icode = CODE_FOR_avx2_gathersiv4df;
39400 goto gather_gen;
39401 case IX86_BUILTIN_GATHERDIV2DF:
39402 icode = CODE_FOR_avx2_gatherdiv2df;
39403 goto gather_gen;
39404 case IX86_BUILTIN_GATHERDIV4DF:
39405 icode = CODE_FOR_avx2_gatherdiv4df;
39406 goto gather_gen;
39407 case IX86_BUILTIN_GATHERSIV4SF:
39408 icode = CODE_FOR_avx2_gathersiv4sf;
39409 goto gather_gen;
39410 case IX86_BUILTIN_GATHERSIV8SF:
39411 icode = CODE_FOR_avx2_gathersiv8sf;
39412 goto gather_gen;
39413 case IX86_BUILTIN_GATHERDIV4SF:
39414 icode = CODE_FOR_avx2_gatherdiv4sf;
39415 goto gather_gen;
39416 case IX86_BUILTIN_GATHERDIV8SF:
39417 icode = CODE_FOR_avx2_gatherdiv8sf;
39418 goto gather_gen;
39419 case IX86_BUILTIN_GATHERSIV2DI:
39420 icode = CODE_FOR_avx2_gathersiv2di;
39421 goto gather_gen;
39422 case IX86_BUILTIN_GATHERSIV4DI:
39423 icode = CODE_FOR_avx2_gathersiv4di;
39424 goto gather_gen;
39425 case IX86_BUILTIN_GATHERDIV2DI:
39426 icode = CODE_FOR_avx2_gatherdiv2di;
39427 goto gather_gen;
39428 case IX86_BUILTIN_GATHERDIV4DI:
39429 icode = CODE_FOR_avx2_gatherdiv4di;
39430 goto gather_gen;
39431 case IX86_BUILTIN_GATHERSIV4SI:
39432 icode = CODE_FOR_avx2_gathersiv4si;
39433 goto gather_gen;
39434 case IX86_BUILTIN_GATHERSIV8SI:
39435 icode = CODE_FOR_avx2_gathersiv8si;
39436 goto gather_gen;
39437 case IX86_BUILTIN_GATHERDIV4SI:
39438 icode = CODE_FOR_avx2_gatherdiv4si;
39439 goto gather_gen;
39440 case IX86_BUILTIN_GATHERDIV8SI:
39441 icode = CODE_FOR_avx2_gatherdiv8si;
39442 goto gather_gen;
39443 case IX86_BUILTIN_GATHERALTSIV4DF:
39444 icode = CODE_FOR_avx2_gathersiv4df;
39445 goto gather_gen;
39446 case IX86_BUILTIN_GATHERALTDIV8SF:
39447 icode = CODE_FOR_avx2_gatherdiv8sf;
39448 goto gather_gen;
39449 case IX86_BUILTIN_GATHERALTSIV4DI:
39450 icode = CODE_FOR_avx2_gathersiv4di;
39451 goto gather_gen;
39452 case IX86_BUILTIN_GATHERALTDIV8SI:
39453 icode = CODE_FOR_avx2_gatherdiv8si;
39454 goto gather_gen;
39455 case IX86_BUILTIN_GATHER3SIV16SF:
39456 icode = CODE_FOR_avx512f_gathersiv16sf;
39457 goto gather_gen;
39458 case IX86_BUILTIN_GATHER3SIV8DF:
39459 icode = CODE_FOR_avx512f_gathersiv8df;
39460 goto gather_gen;
39461 case IX86_BUILTIN_GATHER3DIV16SF:
39462 icode = CODE_FOR_avx512f_gatherdiv16sf;
39463 goto gather_gen;
39464 case IX86_BUILTIN_GATHER3DIV8DF:
39465 icode = CODE_FOR_avx512f_gatherdiv8df;
39466 goto gather_gen;
39467 case IX86_BUILTIN_GATHER3SIV16SI:
39468 icode = CODE_FOR_avx512f_gathersiv16si;
39469 goto gather_gen;
39470 case IX86_BUILTIN_GATHER3SIV8DI:
39471 icode = CODE_FOR_avx512f_gathersiv8di;
39472 goto gather_gen;
39473 case IX86_BUILTIN_GATHER3DIV16SI:
39474 icode = CODE_FOR_avx512f_gatherdiv16si;
39475 goto gather_gen;
39476 case IX86_BUILTIN_GATHER3DIV8DI:
39477 icode = CODE_FOR_avx512f_gatherdiv8di;
39478 goto gather_gen;
39479 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39480 icode = CODE_FOR_avx512f_gathersiv8df;
39481 goto gather_gen;
39482 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39483 icode = CODE_FOR_avx512f_gatherdiv16sf;
39484 goto gather_gen;
39485 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39486 icode = CODE_FOR_avx512f_gathersiv8di;
39487 goto gather_gen;
39488 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39489 icode = CODE_FOR_avx512f_gatherdiv16si;
39490 goto gather_gen;
39491 case IX86_BUILTIN_GATHER3SIV2DF:
39492 icode = CODE_FOR_avx512vl_gathersiv2df;
39493 goto gather_gen;
39494 case IX86_BUILTIN_GATHER3SIV4DF:
39495 icode = CODE_FOR_avx512vl_gathersiv4df;
39496 goto gather_gen;
39497 case IX86_BUILTIN_GATHER3DIV2DF:
39498 icode = CODE_FOR_avx512vl_gatherdiv2df;
39499 goto gather_gen;
39500 case IX86_BUILTIN_GATHER3DIV4DF:
39501 icode = CODE_FOR_avx512vl_gatherdiv4df;
39502 goto gather_gen;
39503 case IX86_BUILTIN_GATHER3SIV4SF:
39504 icode = CODE_FOR_avx512vl_gathersiv4sf;
39505 goto gather_gen;
39506 case IX86_BUILTIN_GATHER3SIV8SF:
39507 icode = CODE_FOR_avx512vl_gathersiv8sf;
39508 goto gather_gen;
39509 case IX86_BUILTIN_GATHER3DIV4SF:
39510 icode = CODE_FOR_avx512vl_gatherdiv4sf;
39511 goto gather_gen;
39512 case IX86_BUILTIN_GATHER3DIV8SF:
39513 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39514 goto gather_gen;
39515 case IX86_BUILTIN_GATHER3SIV2DI:
39516 icode = CODE_FOR_avx512vl_gathersiv2di;
39517 goto gather_gen;
39518 case IX86_BUILTIN_GATHER3SIV4DI:
39519 icode = CODE_FOR_avx512vl_gathersiv4di;
39520 goto gather_gen;
39521 case IX86_BUILTIN_GATHER3DIV2DI:
39522 icode = CODE_FOR_avx512vl_gatherdiv2di;
39523 goto gather_gen;
39524 case IX86_BUILTIN_GATHER3DIV4DI:
39525 icode = CODE_FOR_avx512vl_gatherdiv4di;
39526 goto gather_gen;
39527 case IX86_BUILTIN_GATHER3SIV4SI:
39528 icode = CODE_FOR_avx512vl_gathersiv4si;
39529 goto gather_gen;
39530 case IX86_BUILTIN_GATHER3SIV8SI:
39531 icode = CODE_FOR_avx512vl_gathersiv8si;
39532 goto gather_gen;
39533 case IX86_BUILTIN_GATHER3DIV4SI:
39534 icode = CODE_FOR_avx512vl_gatherdiv4si;
39535 goto gather_gen;
39536 case IX86_BUILTIN_GATHER3DIV8SI:
39537 icode = CODE_FOR_avx512vl_gatherdiv8si;
39538 goto gather_gen;
39539 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39540 icode = CODE_FOR_avx512vl_gathersiv4df;
39541 goto gather_gen;
39542 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39543 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39544 goto gather_gen;
39545 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39546 icode = CODE_FOR_avx512vl_gathersiv4di;
39547 goto gather_gen;
39548 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39549 icode = CODE_FOR_avx512vl_gatherdiv8si;
39550 goto gather_gen;
39551 case IX86_BUILTIN_SCATTERSIV16SF:
39552 icode = CODE_FOR_avx512f_scattersiv16sf;
39553 goto scatter_gen;
39554 case IX86_BUILTIN_SCATTERSIV8DF:
39555 icode = CODE_FOR_avx512f_scattersiv8df;
39556 goto scatter_gen;
39557 case IX86_BUILTIN_SCATTERDIV16SF:
39558 icode = CODE_FOR_avx512f_scatterdiv16sf;
39559 goto scatter_gen;
39560 case IX86_BUILTIN_SCATTERDIV8DF:
39561 icode = CODE_FOR_avx512f_scatterdiv8df;
39562 goto scatter_gen;
39563 case IX86_BUILTIN_SCATTERSIV16SI:
39564 icode = CODE_FOR_avx512f_scattersiv16si;
39565 goto scatter_gen;
39566 case IX86_BUILTIN_SCATTERSIV8DI:
39567 icode = CODE_FOR_avx512f_scattersiv8di;
39568 goto scatter_gen;
39569 case IX86_BUILTIN_SCATTERDIV16SI:
39570 icode = CODE_FOR_avx512f_scatterdiv16si;
39571 goto scatter_gen;
39572 case IX86_BUILTIN_SCATTERDIV8DI:
39573 icode = CODE_FOR_avx512f_scatterdiv8di;
39574 goto scatter_gen;
39575 case IX86_BUILTIN_SCATTERSIV8SF:
39576 icode = CODE_FOR_avx512vl_scattersiv8sf;
39577 goto scatter_gen;
39578 case IX86_BUILTIN_SCATTERSIV4SF:
39579 icode = CODE_FOR_avx512vl_scattersiv4sf;
39580 goto scatter_gen;
39581 case IX86_BUILTIN_SCATTERSIV4DF:
39582 icode = CODE_FOR_avx512vl_scattersiv4df;
39583 goto scatter_gen;
39584 case IX86_BUILTIN_SCATTERSIV2DF:
39585 icode = CODE_FOR_avx512vl_scattersiv2df;
39586 goto scatter_gen;
39587 case IX86_BUILTIN_SCATTERDIV8SF:
39588 icode = CODE_FOR_avx512vl_scatterdiv8sf;
39589 goto scatter_gen;
39590 case IX86_BUILTIN_SCATTERDIV4SF:
39591 icode = CODE_FOR_avx512vl_scatterdiv4sf;
39592 goto scatter_gen;
39593 case IX86_BUILTIN_SCATTERDIV4DF:
39594 icode = CODE_FOR_avx512vl_scatterdiv4df;
39595 goto scatter_gen;
39596 case IX86_BUILTIN_SCATTERDIV2DF:
39597 icode = CODE_FOR_avx512vl_scatterdiv2df;
39598 goto scatter_gen;
39599 case IX86_BUILTIN_SCATTERSIV8SI:
39600 icode = CODE_FOR_avx512vl_scattersiv8si;
39601 goto scatter_gen;
39602 case IX86_BUILTIN_SCATTERSIV4SI:
39603 icode = CODE_FOR_avx512vl_scattersiv4si;
39604 goto scatter_gen;
39605 case IX86_BUILTIN_SCATTERSIV4DI:
39606 icode = CODE_FOR_avx512vl_scattersiv4di;
39607 goto scatter_gen;
39608 case IX86_BUILTIN_SCATTERSIV2DI:
39609 icode = CODE_FOR_avx512vl_scattersiv2di;
39610 goto scatter_gen;
39611 case IX86_BUILTIN_SCATTERDIV8SI:
39612 icode = CODE_FOR_avx512vl_scatterdiv8si;
39613 goto scatter_gen;
39614 case IX86_BUILTIN_SCATTERDIV4SI:
39615 icode = CODE_FOR_avx512vl_scatterdiv4si;
39616 goto scatter_gen;
39617 case IX86_BUILTIN_SCATTERDIV4DI:
39618 icode = CODE_FOR_avx512vl_scatterdiv4di;
39619 goto scatter_gen;
39620 case IX86_BUILTIN_SCATTERDIV2DI:
39621 icode = CODE_FOR_avx512vl_scatterdiv2di;
39622 goto scatter_gen;
39623 case IX86_BUILTIN_GATHERPFDPD:
39624 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39625 goto vec_prefetch_gen;
39626 case IX86_BUILTIN_GATHERPFDPS:
39627 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39628 goto vec_prefetch_gen;
39629 case IX86_BUILTIN_GATHERPFQPD:
39630 icode = CODE_FOR_avx512pf_gatherpfv8didf;
39631 goto vec_prefetch_gen;
39632 case IX86_BUILTIN_GATHERPFQPS:
39633 icode = CODE_FOR_avx512pf_gatherpfv8disf;
39634 goto vec_prefetch_gen;
39635 case IX86_BUILTIN_SCATTERPFDPD:
39636 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39637 goto vec_prefetch_gen;
39638 case IX86_BUILTIN_SCATTERPFDPS:
39639 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39640 goto vec_prefetch_gen;
39641 case IX86_BUILTIN_SCATTERPFQPD:
39642 icode = CODE_FOR_avx512pf_scatterpfv8didf;
39643 goto vec_prefetch_gen;
39644 case IX86_BUILTIN_SCATTERPFQPS:
39645 icode = CODE_FOR_avx512pf_scatterpfv8disf;
39646 goto vec_prefetch_gen;
39648 gather_gen:
39649 rtx half;
39650 rtx (*gen) (rtx, rtx);
39652 arg0 = CALL_EXPR_ARG (exp, 0);
39653 arg1 = CALL_EXPR_ARG (exp, 1);
39654 arg2 = CALL_EXPR_ARG (exp, 2);
39655 arg3 = CALL_EXPR_ARG (exp, 3);
39656 arg4 = CALL_EXPR_ARG (exp, 4);
39657 op0 = expand_normal (arg0);
39658 op1 = expand_normal (arg1);
39659 op2 = expand_normal (arg2);
39660 op3 = expand_normal (arg3);
39661 op4 = expand_normal (arg4);
39662 /* Note the arg order is different from the operand order. */
39663 mode0 = insn_data[icode].operand[1].mode;
39664 mode2 = insn_data[icode].operand[3].mode;
39665 mode3 = insn_data[icode].operand[4].mode;
39666 mode4 = insn_data[icode].operand[5].mode;
39668 if (target == NULL_RTX
39669 || GET_MODE (target) != insn_data[icode].operand[0].mode
39670 || !insn_data[icode].operand[0].predicate (target,
39671 GET_MODE (target)))
39672 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
39673 else
39674 subtarget = target;
39676 switch (fcode)
39678 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39679 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39680 half = gen_reg_rtx (V8SImode);
39681 if (!nonimmediate_operand (op2, V16SImode))
39682 op2 = copy_to_mode_reg (V16SImode, op2);
39683 emit_insn (gen_vec_extract_lo_v16si (half, op2));
39684 op2 = half;
39685 break;
39686 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39687 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39688 case IX86_BUILTIN_GATHERALTSIV4DF:
39689 case IX86_BUILTIN_GATHERALTSIV4DI:
39690 half = gen_reg_rtx (V4SImode);
39691 if (!nonimmediate_operand (op2, V8SImode))
39692 op2 = copy_to_mode_reg (V8SImode, op2);
39693 emit_insn (gen_vec_extract_lo_v8si (half, op2));
39694 op2 = half;
39695 break;
39696 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39697 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39698 half = gen_reg_rtx (mode0);
39699 if (mode0 == V8SFmode)
39700 gen = gen_vec_extract_lo_v16sf;
39701 else
39702 gen = gen_vec_extract_lo_v16si;
39703 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39704 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39705 emit_insn (gen (half, op0));
39706 op0 = half;
39707 if (GET_MODE (op3) != VOIDmode)
39709 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39710 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39711 emit_insn (gen (half, op3));
39712 op3 = half;
39714 break;
39715 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39716 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39717 case IX86_BUILTIN_GATHERALTDIV8SF:
39718 case IX86_BUILTIN_GATHERALTDIV8SI:
39719 half = gen_reg_rtx (mode0);
39720 if (mode0 == V4SFmode)
39721 gen = gen_vec_extract_lo_v8sf;
39722 else
39723 gen = gen_vec_extract_lo_v8si;
39724 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39725 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39726 emit_insn (gen (half, op0));
39727 op0 = half;
39728 if (GET_MODE (op3) != VOIDmode)
39730 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39731 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39732 emit_insn (gen (half, op3));
39733 op3 = half;
39735 break;
39736 default:
39737 break;
39740 /* Force memory operand only with base register here. But we
39741 don't want to do it on memory operand for other builtin
39742 functions. */
39743 op1 = ix86_zero_extend_to_Pmode (op1);
39745 if (!insn_data[icode].operand[1].predicate (op0, mode0))
39746 op0 = copy_to_mode_reg (mode0, op0);
39747 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
39748 op1 = copy_to_mode_reg (Pmode, op1);
39749 if (!insn_data[icode].operand[3].predicate (op2, mode2))
39750 op2 = copy_to_mode_reg (mode2, op2);
39752 op3 = fixup_modeless_constant (op3, mode3);
39754 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
39756 if (!insn_data[icode].operand[4].predicate (op3, mode3))
39757 op3 = copy_to_mode_reg (mode3, op3);
39759 else
39761 op3 = copy_to_reg (op3);
39762 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
39764 if (!insn_data[icode].operand[5].predicate (op4, mode4))
39766 error ("the last argument must be scale 1, 2, 4, 8");
39767 return const0_rtx;
39770 /* Optimize. If mask is known to have all high bits set,
39771 replace op0 with pc_rtx to signal that the instruction
39772 overwrites the whole destination and doesn't use its
39773 previous contents. */
39774 if (optimize)
39776 if (TREE_CODE (arg3) == INTEGER_CST)
39778 if (integer_all_onesp (arg3))
39779 op0 = pc_rtx;
39781 else if (TREE_CODE (arg3) == VECTOR_CST)
39783 unsigned int negative = 0;
39784 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
39786 tree cst = VECTOR_CST_ELT (arg3, i);
39787 if (TREE_CODE (cst) == INTEGER_CST
39788 && tree_int_cst_sign_bit (cst))
39789 negative++;
39790 else if (TREE_CODE (cst) == REAL_CST
39791 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
39792 negative++;
39794 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
39795 op0 = pc_rtx;
39797 else if (TREE_CODE (arg3) == SSA_NAME
39798 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
39800 /* Recognize also when mask is like:
39801 __v2df src = _mm_setzero_pd ();
39802 __v2df mask = _mm_cmpeq_pd (src, src);
39804 __v8sf src = _mm256_setzero_ps ();
39805 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
39806 as that is a cheaper way to load all ones into
39807 a register than having to load a constant from
39808 memory. */
39809 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
39810 if (is_gimple_call (def_stmt))
39812 tree fndecl = gimple_call_fndecl (def_stmt);
39813 if (fndecl
39814 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
39815 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
39817 case IX86_BUILTIN_CMPPD:
39818 case IX86_BUILTIN_CMPPS:
39819 case IX86_BUILTIN_CMPPD256:
39820 case IX86_BUILTIN_CMPPS256:
39821 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
39822 break;
39823 /* FALLTHRU */
39824 case IX86_BUILTIN_CMPEQPD:
39825 case IX86_BUILTIN_CMPEQPS:
39826 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
39827 && initializer_zerop (gimple_call_arg (def_stmt,
39828 1)))
39829 op0 = pc_rtx;
39830 break;
39831 default:
39832 break;
39838 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
39839 if (! pat)
39840 return const0_rtx;
39841 emit_insn (pat);
39843 switch (fcode)
39845 case IX86_BUILTIN_GATHER3DIV16SF:
39846 if (target == NULL_RTX)
39847 target = gen_reg_rtx (V8SFmode);
39848 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
39849 break;
39850 case IX86_BUILTIN_GATHER3DIV16SI:
39851 if (target == NULL_RTX)
39852 target = gen_reg_rtx (V8SImode);
39853 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
39854 break;
39855 case IX86_BUILTIN_GATHER3DIV8SF:
39856 case IX86_BUILTIN_GATHERDIV8SF:
39857 if (target == NULL_RTX)
39858 target = gen_reg_rtx (V4SFmode);
39859 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
39860 break;
39861 case IX86_BUILTIN_GATHER3DIV8SI:
39862 case IX86_BUILTIN_GATHERDIV8SI:
39863 if (target == NULL_RTX)
39864 target = gen_reg_rtx (V4SImode);
39865 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
39866 break;
39867 default:
39868 target = subtarget;
39869 break;
39871 return target;
39873 scatter_gen:
39874 arg0 = CALL_EXPR_ARG (exp, 0);
39875 arg1 = CALL_EXPR_ARG (exp, 1);
39876 arg2 = CALL_EXPR_ARG (exp, 2);
39877 arg3 = CALL_EXPR_ARG (exp, 3);
39878 arg4 = CALL_EXPR_ARG (exp, 4);
39879 op0 = expand_normal (arg0);
39880 op1 = expand_normal (arg1);
39881 op2 = expand_normal (arg2);
39882 op3 = expand_normal (arg3);
39883 op4 = expand_normal (arg4);
39884 mode1 = insn_data[icode].operand[1].mode;
39885 mode2 = insn_data[icode].operand[2].mode;
39886 mode3 = insn_data[icode].operand[3].mode;
39887 mode4 = insn_data[icode].operand[4].mode;
39889 /* Force memory operand only with base register here. But we
39890 don't want to do it on memory operand for other builtin
39891 functions. */
39892 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
39894 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39895 op0 = copy_to_mode_reg (Pmode, op0);
39897 op1 = fixup_modeless_constant (op1, mode1);
39899 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
39901 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39902 op1 = copy_to_mode_reg (mode1, op1);
39904 else
39906 op1 = copy_to_reg (op1);
39907 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
39910 if (!insn_data[icode].operand[2].predicate (op2, mode2))
39911 op2 = copy_to_mode_reg (mode2, op2);
39913 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39914 op3 = copy_to_mode_reg (mode3, op3);
39916 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39918 error ("the last argument must be scale 1, 2, 4, 8");
39919 return const0_rtx;
39922 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
39923 if (! pat)
39924 return const0_rtx;
39926 emit_insn (pat);
39927 return 0;
39929 vec_prefetch_gen:
39930 arg0 = CALL_EXPR_ARG (exp, 0);
39931 arg1 = CALL_EXPR_ARG (exp, 1);
39932 arg2 = CALL_EXPR_ARG (exp, 2);
39933 arg3 = CALL_EXPR_ARG (exp, 3);
39934 arg4 = CALL_EXPR_ARG (exp, 4);
39935 op0 = expand_normal (arg0);
39936 op1 = expand_normal (arg1);
39937 op2 = expand_normal (arg2);
39938 op3 = expand_normal (arg3);
39939 op4 = expand_normal (arg4);
39940 mode0 = insn_data[icode].operand[0].mode;
39941 mode1 = insn_data[icode].operand[1].mode;
39942 mode3 = insn_data[icode].operand[3].mode;
39943 mode4 = insn_data[icode].operand[4].mode;
39945 op0 = fixup_modeless_constant (op0, mode0);
39947 if (GET_MODE (op0) == mode0
39948 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
39950 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39951 op0 = copy_to_mode_reg (mode0, op0);
39953 else if (op0 != constm1_rtx)
39955 op0 = copy_to_reg (op0);
39956 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39959 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39960 op1 = copy_to_mode_reg (mode1, op1);
39962 /* Force memory operand only with base register here. But we
39963 don't want to do it on memory operand for other builtin
39964 functions. */
39965 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
39967 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
39968 op2 = copy_to_mode_reg (Pmode, op2);
39970 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39972 error ("the forth argument must be scale 1, 2, 4, 8");
39973 return const0_rtx;
39976 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39978 error ("incorrect hint operand");
39979 return const0_rtx;
39982 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
39983 if (! pat)
39984 return const0_rtx;
39986 emit_insn (pat);
39988 return 0;
39990 case IX86_BUILTIN_XABORT:
39991 icode = CODE_FOR_xabort;
39992 arg0 = CALL_EXPR_ARG (exp, 0);
39993 op0 = expand_normal (arg0);
39994 mode0 = insn_data[icode].operand[0].mode;
39995 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39997 error ("the xabort's argument must be an 8-bit immediate");
39998 return const0_rtx;
40000 emit_insn (gen_xabort (op0));
40001 return 0;
40003 default:
40004 break;
40007 for (i = 0, d = bdesc_special_args;
40008 i < ARRAY_SIZE (bdesc_special_args);
40009 i++, d++)
40010 if (d->code == fcode)
40011 return ix86_expand_special_args_builtin (d, exp, target);
40013 for (i = 0, d = bdesc_args;
40014 i < ARRAY_SIZE (bdesc_args);
40015 i++, d++)
40016 if (d->code == fcode)
40017 switch (fcode)
40019 case IX86_BUILTIN_FABSQ:
40020 case IX86_BUILTIN_COPYSIGNQ:
40021 if (!TARGET_SSE)
40022 /* Emit a normal call if SSE isn't available. */
40023 return expand_call (exp, target, ignore);
40024 default:
40025 return ix86_expand_args_builtin (d, exp, target);
40028 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
40029 if (d->code == fcode)
40030 return ix86_expand_sse_comi (d, exp, target);
40032 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
40033 if (d->code == fcode)
40034 return ix86_expand_round_builtin (d, exp, target);
40036 for (i = 0, d = bdesc_pcmpestr;
40037 i < ARRAY_SIZE (bdesc_pcmpestr);
40038 i++, d++)
40039 if (d->code == fcode)
40040 return ix86_expand_sse_pcmpestr (d, exp, target);
40042 for (i = 0, d = bdesc_pcmpistr;
40043 i < ARRAY_SIZE (bdesc_pcmpistr);
40044 i++, d++)
40045 if (d->code == fcode)
40046 return ix86_expand_sse_pcmpistr (d, exp, target);
40048 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
40049 if (d->code == fcode)
40050 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
40051 (enum ix86_builtin_func_type)
40052 d->flag, d->comparison);
40054 gcc_unreachable ();
40057 /* This returns the target-specific builtin with code CODE if
40058 current_function_decl has visibility on this builtin, which is checked
40059 using isa flags. Returns NULL_TREE otherwise. */
40061 static tree ix86_get_builtin (enum ix86_builtins code)
40063 struct cl_target_option *opts;
40064 tree target_tree = NULL_TREE;
40066 /* Determine the isa flags of current_function_decl. */
40068 if (current_function_decl)
40069 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
40071 if (target_tree == NULL)
40072 target_tree = target_option_default_node;
40074 opts = TREE_TARGET_OPTION (target_tree);
40076 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
40077 return ix86_builtin_decl (code, true);
40078 else
40079 return NULL_TREE;
40082 /* Return function decl for target specific builtin
40083 for given MPX builtin passed i FCODE. */
40084 static tree
40085 ix86_builtin_mpx_function (unsigned fcode)
40087 switch (fcode)
40089 case BUILT_IN_CHKP_BNDMK:
40090 return ix86_builtins[IX86_BUILTIN_BNDMK];
40092 case BUILT_IN_CHKP_BNDSTX:
40093 return ix86_builtins[IX86_BUILTIN_BNDSTX];
40095 case BUILT_IN_CHKP_BNDLDX:
40096 return ix86_builtins[IX86_BUILTIN_BNDLDX];
40098 case BUILT_IN_CHKP_BNDCL:
40099 return ix86_builtins[IX86_BUILTIN_BNDCL];
40101 case BUILT_IN_CHKP_BNDCU:
40102 return ix86_builtins[IX86_BUILTIN_BNDCU];
40104 case BUILT_IN_CHKP_BNDRET:
40105 return ix86_builtins[IX86_BUILTIN_BNDRET];
40107 case BUILT_IN_CHKP_INTERSECT:
40108 return ix86_builtins[IX86_BUILTIN_BNDINT];
40110 case BUILT_IN_CHKP_NARROW:
40111 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
40113 case BUILT_IN_CHKP_SIZEOF:
40114 return ix86_builtins[IX86_BUILTIN_SIZEOF];
40116 case BUILT_IN_CHKP_EXTRACT_LOWER:
40117 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
40119 case BUILT_IN_CHKP_EXTRACT_UPPER:
40120 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
40122 default:
40123 return NULL_TREE;
40126 gcc_unreachable ();
40129 /* Helper function for ix86_load_bounds and ix86_store_bounds.
40131 Return an address to be used to load/store bounds for pointer
40132 passed in SLOT.
40134 SLOT_NO is an integer constant holding number of a target
40135 dependent special slot to be used in case SLOT is not a memory.
40137 SPECIAL_BASE is a pointer to be used as a base of fake address
40138 to access special slots in Bounds Table. SPECIAL_BASE[-1],
40139 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
40141 static rtx
40142 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
40144 rtx addr = NULL;
40146 /* NULL slot means we pass bounds for pointer not passed to the
40147 function at all. Register slot means we pass pointer in a
40148 register. In both these cases bounds are passed via Bounds
40149 Table. Since we do not have actual pointer stored in memory,
40150 we have to use fake addresses to access Bounds Table. We
40151 start with (special_base - sizeof (void*)) and decrease this
40152 address by pointer size to get addresses for other slots. */
40153 if (!slot || REG_P (slot))
40155 gcc_assert (CONST_INT_P (slot_no));
40156 addr = plus_constant (Pmode, special_base,
40157 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
40159 /* If pointer is passed in a memory then its address is used to
40160 access Bounds Table. */
40161 else if (MEM_P (slot))
40163 addr = XEXP (slot, 0);
40164 if (!register_operand (addr, Pmode))
40165 addr = copy_addr_to_reg (addr);
40167 else
40168 gcc_unreachable ();
40170 return addr;
40173 /* Expand pass uses this hook to load bounds for function parameter
40174 PTR passed in SLOT in case its bounds are not passed in a register.
40176 If SLOT is a memory, then bounds are loaded as for regular pointer
40177 loaded from memory. PTR may be NULL in case SLOT is a memory.
40178 In such case value of PTR (if required) may be loaded from SLOT.
40180 If SLOT is NULL or a register then SLOT_NO is an integer constant
40181 holding number of the target dependent special slot which should be
40182 used to obtain bounds.
40184 Return loaded bounds. */
40186 static rtx
40187 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
40189 rtx reg = gen_reg_rtx (BNDmode);
40190 rtx addr;
40192 /* Get address to be used to access Bounds Table. Special slots start
40193 at the location of return address of the current function. */
40194 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
40196 /* Load pointer value from a memory if we don't have it. */
40197 if (!ptr)
40199 gcc_assert (MEM_P (slot));
40200 ptr = copy_addr_to_reg (slot);
40203 emit_insn (BNDmode == BND64mode
40204 ? gen_bnd64_ldx (reg, addr, ptr)
40205 : gen_bnd32_ldx (reg, addr, ptr));
40207 return reg;
40210 /* Expand pass uses this hook to store BOUNDS for call argument PTR
40211 passed in SLOT in case BOUNDS are not passed in a register.
40213 If SLOT is a memory, then BOUNDS are stored as for regular pointer
40214 stored in memory. PTR may be NULL in case SLOT is a memory.
40215 In such case value of PTR (if required) may be loaded from SLOT.
40217 If SLOT is NULL or a register then SLOT_NO is an integer constant
40218 holding number of the target dependent special slot which should be
40219 used to store BOUNDS. */
40221 static void
40222 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
40224 rtx addr;
40226 /* Get address to be used to access Bounds Table. Special slots start
40227 at the location of return address of a called function. */
40228 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
40230 /* Load pointer value from a memory if we don't have it. */
40231 if (!ptr)
40233 gcc_assert (MEM_P (slot));
40234 ptr = copy_addr_to_reg (slot);
40237 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
40238 if (!register_operand (bounds, BNDmode))
40239 bounds = copy_to_mode_reg (BNDmode, bounds);
40241 emit_insn (BNDmode == BND64mode
40242 ? gen_bnd64_stx (addr, ptr, bounds)
40243 : gen_bnd32_stx (addr, ptr, bounds));
40246 /* Load and return bounds returned by function in SLOT. */
40248 static rtx
40249 ix86_load_returned_bounds (rtx slot)
40251 rtx res;
40253 gcc_assert (REG_P (slot));
40254 res = gen_reg_rtx (BNDmode);
40255 emit_move_insn (res, slot);
40257 return res;
40260 /* Store BOUNDS returned by function into SLOT. */
40262 static void
40263 ix86_store_returned_bounds (rtx slot, rtx bounds)
40265 gcc_assert (REG_P (slot));
40266 emit_move_insn (slot, bounds);
40269 /* Returns a function decl for a vectorized version of the builtin function
40270 with builtin function code FN and the result vector type TYPE, or NULL_TREE
40271 if it is not available. */
40273 static tree
40274 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
40275 tree type_in)
40277 machine_mode in_mode, out_mode;
40278 int in_n, out_n;
40279 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
40281 if (TREE_CODE (type_out) != VECTOR_TYPE
40282 || TREE_CODE (type_in) != VECTOR_TYPE
40283 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40284 return NULL_TREE;
40286 out_mode = TYPE_MODE (TREE_TYPE (type_out));
40287 out_n = TYPE_VECTOR_SUBPARTS (type_out);
40288 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40289 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40291 switch (fn)
40293 case BUILT_IN_SQRT:
40294 if (out_mode == DFmode && in_mode == DFmode)
40296 if (out_n == 2 && in_n == 2)
40297 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40298 else if (out_n == 4 && in_n == 4)
40299 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40300 else if (out_n == 8 && in_n == 8)
40301 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40303 break;
40305 case BUILT_IN_EXP2F:
40306 if (out_mode == SFmode && in_mode == SFmode)
40308 if (out_n == 16 && in_n == 16)
40309 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40311 break;
40313 case BUILT_IN_SQRTF:
40314 if (out_mode == SFmode && in_mode == SFmode)
40316 if (out_n == 4 && in_n == 4)
40317 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40318 else if (out_n == 8 && in_n == 8)
40319 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40320 else if (out_n == 16 && in_n == 16)
40321 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40323 break;
40325 case BUILT_IN_IFLOOR:
40326 case BUILT_IN_LFLOOR:
40327 case BUILT_IN_LLFLOOR:
40328 /* The round insn does not trap on denormals. */
40329 if (flag_trapping_math || !TARGET_ROUND)
40330 break;
40332 if (out_mode == SImode && in_mode == DFmode)
40334 if (out_n == 4 && in_n == 2)
40335 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40336 else if (out_n == 8 && in_n == 4)
40337 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40338 else if (out_n == 16 && in_n == 8)
40339 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40341 break;
40343 case BUILT_IN_IFLOORF:
40344 case BUILT_IN_LFLOORF:
40345 case BUILT_IN_LLFLOORF:
40346 /* The round insn does not trap on denormals. */
40347 if (flag_trapping_math || !TARGET_ROUND)
40348 break;
40350 if (out_mode == SImode && in_mode == SFmode)
40352 if (out_n == 4 && in_n == 4)
40353 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40354 else if (out_n == 8 && in_n == 8)
40355 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40357 break;
40359 case BUILT_IN_ICEIL:
40360 case BUILT_IN_LCEIL:
40361 case BUILT_IN_LLCEIL:
40362 /* The round insn does not trap on denormals. */
40363 if (flag_trapping_math || !TARGET_ROUND)
40364 break;
40366 if (out_mode == SImode && in_mode == DFmode)
40368 if (out_n == 4 && in_n == 2)
40369 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40370 else if (out_n == 8 && in_n == 4)
40371 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40372 else if (out_n == 16 && in_n == 8)
40373 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40375 break;
40377 case BUILT_IN_ICEILF:
40378 case BUILT_IN_LCEILF:
40379 case BUILT_IN_LLCEILF:
40380 /* The round insn does not trap on denormals. */
40381 if (flag_trapping_math || !TARGET_ROUND)
40382 break;
40384 if (out_mode == SImode && in_mode == SFmode)
40386 if (out_n == 4 && in_n == 4)
40387 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40388 else if (out_n == 8 && in_n == 8)
40389 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40391 break;
40393 case BUILT_IN_IRINT:
40394 case BUILT_IN_LRINT:
40395 case BUILT_IN_LLRINT:
40396 if (out_mode == SImode && in_mode == DFmode)
40398 if (out_n == 4 && in_n == 2)
40399 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40400 else if (out_n == 8 && in_n == 4)
40401 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40403 break;
40405 case BUILT_IN_IRINTF:
40406 case BUILT_IN_LRINTF:
40407 case BUILT_IN_LLRINTF:
40408 if (out_mode == SImode && in_mode == SFmode)
40410 if (out_n == 4 && in_n == 4)
40411 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40412 else if (out_n == 8 && in_n == 8)
40413 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40415 break;
40417 case BUILT_IN_IROUND:
40418 case BUILT_IN_LROUND:
40419 case BUILT_IN_LLROUND:
40420 /* The round insn does not trap on denormals. */
40421 if (flag_trapping_math || !TARGET_ROUND)
40422 break;
40424 if (out_mode == SImode && in_mode == DFmode)
40426 if (out_n == 4 && in_n == 2)
40427 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40428 else if (out_n == 8 && in_n == 4)
40429 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40430 else if (out_n == 16 && in_n == 8)
40431 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40433 break;
40435 case BUILT_IN_IROUNDF:
40436 case BUILT_IN_LROUNDF:
40437 case BUILT_IN_LLROUNDF:
40438 /* The round insn does not trap on denormals. */
40439 if (flag_trapping_math || !TARGET_ROUND)
40440 break;
40442 if (out_mode == SImode && in_mode == SFmode)
40444 if (out_n == 4 && in_n == 4)
40445 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40446 else if (out_n == 8 && in_n == 8)
40447 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40449 break;
40451 case BUILT_IN_COPYSIGN:
40452 if (out_mode == DFmode && in_mode == DFmode)
40454 if (out_n == 2 && in_n == 2)
40455 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40456 else if (out_n == 4 && in_n == 4)
40457 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40458 else if (out_n == 8 && in_n == 8)
40459 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40461 break;
40463 case BUILT_IN_COPYSIGNF:
40464 if (out_mode == SFmode && in_mode == SFmode)
40466 if (out_n == 4 && in_n == 4)
40467 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40468 else if (out_n == 8 && in_n == 8)
40469 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40470 else if (out_n == 16 && in_n == 16)
40471 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40473 break;
40475 case BUILT_IN_FLOOR:
40476 /* The round insn does not trap on denormals. */
40477 if (flag_trapping_math || !TARGET_ROUND)
40478 break;
40480 if (out_mode == DFmode && in_mode == DFmode)
40482 if (out_n == 2 && in_n == 2)
40483 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40484 else if (out_n == 4 && in_n == 4)
40485 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40487 break;
40489 case BUILT_IN_FLOORF:
40490 /* The round insn does not trap on denormals. */
40491 if (flag_trapping_math || !TARGET_ROUND)
40492 break;
40494 if (out_mode == SFmode && in_mode == SFmode)
40496 if (out_n == 4 && in_n == 4)
40497 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40498 else if (out_n == 8 && in_n == 8)
40499 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40501 break;
40503 case BUILT_IN_CEIL:
40504 /* The round insn does not trap on denormals. */
40505 if (flag_trapping_math || !TARGET_ROUND)
40506 break;
40508 if (out_mode == DFmode && in_mode == DFmode)
40510 if (out_n == 2 && in_n == 2)
40511 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40512 else if (out_n == 4 && in_n == 4)
40513 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40515 break;
40517 case BUILT_IN_CEILF:
40518 /* The round insn does not trap on denormals. */
40519 if (flag_trapping_math || !TARGET_ROUND)
40520 break;
40522 if (out_mode == SFmode && in_mode == SFmode)
40524 if (out_n == 4 && in_n == 4)
40525 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40526 else if (out_n == 8 && in_n == 8)
40527 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40529 break;
40531 case BUILT_IN_TRUNC:
40532 /* The round insn does not trap on denormals. */
40533 if (flag_trapping_math || !TARGET_ROUND)
40534 break;
40536 if (out_mode == DFmode && in_mode == DFmode)
40538 if (out_n == 2 && in_n == 2)
40539 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40540 else if (out_n == 4 && in_n == 4)
40541 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40543 break;
40545 case BUILT_IN_TRUNCF:
40546 /* The round insn does not trap on denormals. */
40547 if (flag_trapping_math || !TARGET_ROUND)
40548 break;
40550 if (out_mode == SFmode && in_mode == SFmode)
40552 if (out_n == 4 && in_n == 4)
40553 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40554 else if (out_n == 8 && in_n == 8)
40555 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40557 break;
40559 case BUILT_IN_RINT:
40560 /* The round insn does not trap on denormals. */
40561 if (flag_trapping_math || !TARGET_ROUND)
40562 break;
40564 if (out_mode == DFmode && in_mode == DFmode)
40566 if (out_n == 2 && in_n == 2)
40567 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40568 else if (out_n == 4 && in_n == 4)
40569 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40571 break;
40573 case BUILT_IN_RINTF:
40574 /* The round insn does not trap on denormals. */
40575 if (flag_trapping_math || !TARGET_ROUND)
40576 break;
40578 if (out_mode == SFmode && in_mode == SFmode)
40580 if (out_n == 4 && in_n == 4)
40581 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40582 else if (out_n == 8 && in_n == 8)
40583 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40585 break;
40587 case BUILT_IN_ROUND:
40588 /* The round insn does not trap on denormals. */
40589 if (flag_trapping_math || !TARGET_ROUND)
40590 break;
40592 if (out_mode == DFmode && in_mode == DFmode)
40594 if (out_n == 2 && in_n == 2)
40595 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40596 else if (out_n == 4 && in_n == 4)
40597 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40599 break;
40601 case BUILT_IN_ROUNDF:
40602 /* The round insn does not trap on denormals. */
40603 if (flag_trapping_math || !TARGET_ROUND)
40604 break;
40606 if (out_mode == SFmode && in_mode == SFmode)
40608 if (out_n == 4 && in_n == 4)
40609 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40610 else if (out_n == 8 && in_n == 8)
40611 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40613 break;
40615 case BUILT_IN_FMA:
40616 if (out_mode == DFmode && in_mode == DFmode)
40618 if (out_n == 2 && in_n == 2)
40619 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40620 if (out_n == 4 && in_n == 4)
40621 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40623 break;
40625 case BUILT_IN_FMAF:
40626 if (out_mode == SFmode && in_mode == SFmode)
40628 if (out_n == 4 && in_n == 4)
40629 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40630 if (out_n == 8 && in_n == 8)
40631 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40633 break;
40635 default:
40636 break;
40639 /* Dispatch to a handler for a vectorization library. */
40640 if (ix86_veclib_handler)
40641 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40642 type_in);
40644 return NULL_TREE;
40647 /* Handler for an SVML-style interface to
40648 a library with vectorized intrinsics. */
40650 static tree
40651 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
40653 char name[20];
40654 tree fntype, new_fndecl, args;
40655 unsigned arity;
40656 const char *bname;
40657 machine_mode el_mode, in_mode;
40658 int n, in_n;
40660 /* The SVML is suitable for unsafe math only. */
40661 if (!flag_unsafe_math_optimizations)
40662 return NULL_TREE;
40664 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40665 n = TYPE_VECTOR_SUBPARTS (type_out);
40666 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40667 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40668 if (el_mode != in_mode
40669 || n != in_n)
40670 return NULL_TREE;
40672 switch (fn)
40674 case BUILT_IN_EXP:
40675 case BUILT_IN_LOG:
40676 case BUILT_IN_LOG10:
40677 case BUILT_IN_POW:
40678 case BUILT_IN_TANH:
40679 case BUILT_IN_TAN:
40680 case BUILT_IN_ATAN:
40681 case BUILT_IN_ATAN2:
40682 case BUILT_IN_ATANH:
40683 case BUILT_IN_CBRT:
40684 case BUILT_IN_SINH:
40685 case BUILT_IN_SIN:
40686 case BUILT_IN_ASINH:
40687 case BUILT_IN_ASIN:
40688 case BUILT_IN_COSH:
40689 case BUILT_IN_COS:
40690 case BUILT_IN_ACOSH:
40691 case BUILT_IN_ACOS:
40692 if (el_mode != DFmode || n != 2)
40693 return NULL_TREE;
40694 break;
40696 case BUILT_IN_EXPF:
40697 case BUILT_IN_LOGF:
40698 case BUILT_IN_LOG10F:
40699 case BUILT_IN_POWF:
40700 case BUILT_IN_TANHF:
40701 case BUILT_IN_TANF:
40702 case BUILT_IN_ATANF:
40703 case BUILT_IN_ATAN2F:
40704 case BUILT_IN_ATANHF:
40705 case BUILT_IN_CBRTF:
40706 case BUILT_IN_SINHF:
40707 case BUILT_IN_SINF:
40708 case BUILT_IN_ASINHF:
40709 case BUILT_IN_ASINF:
40710 case BUILT_IN_COSHF:
40711 case BUILT_IN_COSF:
40712 case BUILT_IN_ACOSHF:
40713 case BUILT_IN_ACOSF:
40714 if (el_mode != SFmode || n != 4)
40715 return NULL_TREE;
40716 break;
40718 default:
40719 return NULL_TREE;
40722 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40724 if (fn == BUILT_IN_LOGF)
40725 strcpy (name, "vmlsLn4");
40726 else if (fn == BUILT_IN_LOG)
40727 strcpy (name, "vmldLn2");
40728 else if (n == 4)
40730 sprintf (name, "vmls%s", bname+10);
40731 name[strlen (name)-1] = '4';
40733 else
40734 sprintf (name, "vmld%s2", bname+10);
40736 /* Convert to uppercase. */
40737 name[4] &= ~0x20;
40739 arity = 0;
40740 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40741 args;
40742 args = TREE_CHAIN (args))
40743 arity++;
40745 if (arity == 1)
40746 fntype = build_function_type_list (type_out, type_in, NULL);
40747 else
40748 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40750 /* Build a function declaration for the vectorized function. */
40751 new_fndecl = build_decl (BUILTINS_LOCATION,
40752 FUNCTION_DECL, get_identifier (name), fntype);
40753 TREE_PUBLIC (new_fndecl) = 1;
40754 DECL_EXTERNAL (new_fndecl) = 1;
40755 DECL_IS_NOVOPS (new_fndecl) = 1;
40756 TREE_READONLY (new_fndecl) = 1;
40758 return new_fndecl;
40761 /* Handler for an ACML-style interface to
40762 a library with vectorized intrinsics. */
40764 static tree
40765 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
40767 char name[20] = "__vr.._";
40768 tree fntype, new_fndecl, args;
40769 unsigned arity;
40770 const char *bname;
40771 machine_mode el_mode, in_mode;
40772 int n, in_n;
40774 /* The ACML is 64bits only and suitable for unsafe math only as
40775 it does not correctly support parts of IEEE with the required
40776 precision such as denormals. */
40777 if (!TARGET_64BIT
40778 || !flag_unsafe_math_optimizations)
40779 return NULL_TREE;
40781 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40782 n = TYPE_VECTOR_SUBPARTS (type_out);
40783 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40784 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40785 if (el_mode != in_mode
40786 || n != in_n)
40787 return NULL_TREE;
40789 switch (fn)
40791 case BUILT_IN_SIN:
40792 case BUILT_IN_COS:
40793 case BUILT_IN_EXP:
40794 case BUILT_IN_LOG:
40795 case BUILT_IN_LOG2:
40796 case BUILT_IN_LOG10:
40797 name[4] = 'd';
40798 name[5] = '2';
40799 if (el_mode != DFmode
40800 || n != 2)
40801 return NULL_TREE;
40802 break;
40804 case BUILT_IN_SINF:
40805 case BUILT_IN_COSF:
40806 case BUILT_IN_EXPF:
40807 case BUILT_IN_POWF:
40808 case BUILT_IN_LOGF:
40809 case BUILT_IN_LOG2F:
40810 case BUILT_IN_LOG10F:
40811 name[4] = 's';
40812 name[5] = '4';
40813 if (el_mode != SFmode
40814 || n != 4)
40815 return NULL_TREE;
40816 break;
40818 default:
40819 return NULL_TREE;
40822 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40823 sprintf (name + 7, "%s", bname+10);
40825 arity = 0;
40826 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40827 args;
40828 args = TREE_CHAIN (args))
40829 arity++;
40831 if (arity == 1)
40832 fntype = build_function_type_list (type_out, type_in, NULL);
40833 else
40834 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40836 /* Build a function declaration for the vectorized function. */
40837 new_fndecl = build_decl (BUILTINS_LOCATION,
40838 FUNCTION_DECL, get_identifier (name), fntype);
40839 TREE_PUBLIC (new_fndecl) = 1;
40840 DECL_EXTERNAL (new_fndecl) = 1;
40841 DECL_IS_NOVOPS (new_fndecl) = 1;
40842 TREE_READONLY (new_fndecl) = 1;
40844 return new_fndecl;
40847 /* Returns a decl of a function that implements gather load with
40848 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
40849 Return NULL_TREE if it is not available. */
40851 static tree
40852 ix86_vectorize_builtin_gather (const_tree mem_vectype,
40853 const_tree index_type, int scale)
40855 bool si;
40856 enum ix86_builtins code;
40858 if (! TARGET_AVX2)
40859 return NULL_TREE;
40861 if ((TREE_CODE (index_type) != INTEGER_TYPE
40862 && !POINTER_TYPE_P (index_type))
40863 || (TYPE_MODE (index_type) != SImode
40864 && TYPE_MODE (index_type) != DImode))
40865 return NULL_TREE;
40867 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
40868 return NULL_TREE;
40870 /* v*gather* insn sign extends index to pointer mode. */
40871 if (TYPE_PRECISION (index_type) < POINTER_SIZE
40872 && TYPE_UNSIGNED (index_type))
40873 return NULL_TREE;
40875 if (scale <= 0
40876 || scale > 8
40877 || (scale & (scale - 1)) != 0)
40878 return NULL_TREE;
40880 si = TYPE_MODE (index_type) == SImode;
40881 switch (TYPE_MODE (mem_vectype))
40883 case V2DFmode:
40884 if (TARGET_AVX512VL)
40885 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
40886 else
40887 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
40888 break;
40889 case V4DFmode:
40890 if (TARGET_AVX512VL)
40891 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
40892 else
40893 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
40894 break;
40895 case V2DImode:
40896 if (TARGET_AVX512VL)
40897 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
40898 else
40899 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
40900 break;
40901 case V4DImode:
40902 if (TARGET_AVX512VL)
40903 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
40904 else
40905 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
40906 break;
40907 case V4SFmode:
40908 if (TARGET_AVX512VL)
40909 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
40910 else
40911 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
40912 break;
40913 case V8SFmode:
40914 if (TARGET_AVX512VL)
40915 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
40916 else
40917 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
40918 break;
40919 case V4SImode:
40920 if (TARGET_AVX512VL)
40921 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
40922 else
40923 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
40924 break;
40925 case V8SImode:
40926 if (TARGET_AVX512VL)
40927 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
40928 else
40929 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
40930 break;
40931 case V8DFmode:
40932 if (TARGET_AVX512F)
40933 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
40934 else
40935 return NULL_TREE;
40936 break;
40937 case V8DImode:
40938 if (TARGET_AVX512F)
40939 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
40940 else
40941 return NULL_TREE;
40942 break;
40943 case V16SFmode:
40944 if (TARGET_AVX512F)
40945 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
40946 else
40947 return NULL_TREE;
40948 break;
40949 case V16SImode:
40950 if (TARGET_AVX512F)
40951 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
40952 else
40953 return NULL_TREE;
40954 break;
40955 default:
40956 return NULL_TREE;
40959 return ix86_get_builtin (code);
40962 /* Returns a code for a target-specific builtin that implements
40963 reciprocal of the function, or NULL_TREE if not available. */
40965 static tree
40966 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
40968 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
40969 && flag_finite_math_only && !flag_trapping_math
40970 && flag_unsafe_math_optimizations))
40971 return NULL_TREE;
40973 if (md_fn)
40974 /* Machine dependent builtins. */
40975 switch (fn)
40977 /* Vectorized version of sqrt to rsqrt conversion. */
40978 case IX86_BUILTIN_SQRTPS_NR:
40979 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
40981 case IX86_BUILTIN_SQRTPS_NR256:
40982 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
40984 default:
40985 return NULL_TREE;
40987 else
40988 /* Normal builtins. */
40989 switch (fn)
40991 /* Sqrt to rsqrt conversion. */
40992 case BUILT_IN_SQRTF:
40993 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
40995 default:
40996 return NULL_TREE;
41000 /* Helper for avx_vpermilps256_operand et al. This is also used by
41001 the expansion functions to turn the parallel back into a mask.
41002 The return value is 0 for no match and the imm8+1 for a match. */
41005 avx_vpermilp_parallel (rtx par, machine_mode mode)
41007 unsigned i, nelt = GET_MODE_NUNITS (mode);
41008 unsigned mask = 0;
41009 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
41011 if (XVECLEN (par, 0) != (int) nelt)
41012 return 0;
41014 /* Validate that all of the elements are constants, and not totally
41015 out of range. Copy the data into an integral array to make the
41016 subsequent checks easier. */
41017 for (i = 0; i < nelt; ++i)
41019 rtx er = XVECEXP (par, 0, i);
41020 unsigned HOST_WIDE_INT ei;
41022 if (!CONST_INT_P (er))
41023 return 0;
41024 ei = INTVAL (er);
41025 if (ei >= nelt)
41026 return 0;
41027 ipar[i] = ei;
41030 switch (mode)
41032 case V8DFmode:
41033 /* In the 512-bit DFmode case, we can only move elements within
41034 a 128-bit lane. First fill the second part of the mask,
41035 then fallthru. */
41036 for (i = 4; i < 6; ++i)
41038 if (ipar[i] < 4 || ipar[i] >= 6)
41039 return 0;
41040 mask |= (ipar[i] - 4) << i;
41042 for (i = 6; i < 8; ++i)
41044 if (ipar[i] < 6)
41045 return 0;
41046 mask |= (ipar[i] - 6) << i;
41048 /* FALLTHRU */
41050 case V4DFmode:
41051 /* In the 256-bit DFmode case, we can only move elements within
41052 a 128-bit lane. */
41053 for (i = 0; i < 2; ++i)
41055 if (ipar[i] >= 2)
41056 return 0;
41057 mask |= ipar[i] << i;
41059 for (i = 2; i < 4; ++i)
41061 if (ipar[i] < 2)
41062 return 0;
41063 mask |= (ipar[i] - 2) << i;
41065 break;
41067 case V16SFmode:
41068 /* In 512 bit SFmode case, permutation in the upper 256 bits
41069 must mirror the permutation in the lower 256-bits. */
41070 for (i = 0; i < 8; ++i)
41071 if (ipar[i] + 8 != ipar[i + 8])
41072 return 0;
41073 /* FALLTHRU */
41075 case V8SFmode:
41076 /* In 256 bit SFmode case, we have full freedom of
41077 movement within the low 128-bit lane, but the high 128-bit
41078 lane must mirror the exact same pattern. */
41079 for (i = 0; i < 4; ++i)
41080 if (ipar[i] + 4 != ipar[i + 4])
41081 return 0;
41082 nelt = 4;
41083 /* FALLTHRU */
41085 case V2DFmode:
41086 case V4SFmode:
41087 /* In the 128-bit case, we've full freedom in the placement of
41088 the elements from the source operand. */
41089 for (i = 0; i < nelt; ++i)
41090 mask |= ipar[i] << (i * (nelt / 2));
41091 break;
41093 default:
41094 gcc_unreachable ();
41097 /* Make sure success has a non-zero value by adding one. */
41098 return mask + 1;
41101 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
41102 the expansion functions to turn the parallel back into a mask.
41103 The return value is 0 for no match and the imm8+1 for a match. */
41106 avx_vperm2f128_parallel (rtx par, machine_mode mode)
41108 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
41109 unsigned mask = 0;
41110 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
41112 if (XVECLEN (par, 0) != (int) nelt)
41113 return 0;
41115 /* Validate that all of the elements are constants, and not totally
41116 out of range. Copy the data into an integral array to make the
41117 subsequent checks easier. */
41118 for (i = 0; i < nelt; ++i)
41120 rtx er = XVECEXP (par, 0, i);
41121 unsigned HOST_WIDE_INT ei;
41123 if (!CONST_INT_P (er))
41124 return 0;
41125 ei = INTVAL (er);
41126 if (ei >= 2 * nelt)
41127 return 0;
41128 ipar[i] = ei;
41131 /* Validate that the halves of the permute are halves. */
41132 for (i = 0; i < nelt2 - 1; ++i)
41133 if (ipar[i] + 1 != ipar[i + 1])
41134 return 0;
41135 for (i = nelt2; i < nelt - 1; ++i)
41136 if (ipar[i] + 1 != ipar[i + 1])
41137 return 0;
41139 /* Reconstruct the mask. */
41140 for (i = 0; i < 2; ++i)
41142 unsigned e = ipar[i * nelt2];
41143 if (e % nelt2)
41144 return 0;
41145 e /= nelt2;
41146 mask |= e << (i * 4);
41149 /* Make sure success has a non-zero value by adding one. */
41150 return mask + 1;
41153 /* Return a register priority for hard reg REGNO. */
41154 static int
41155 ix86_register_priority (int hard_regno)
41157 /* ebp and r13 as the base always wants a displacement, r12 as the
41158 base always wants an index. So discourage their usage in an
41159 address. */
41160 if (hard_regno == R12_REG || hard_regno == R13_REG)
41161 return 0;
41162 if (hard_regno == BP_REG)
41163 return 1;
41164 /* New x86-64 int registers result in bigger code size. Discourage
41165 them. */
41166 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
41167 return 2;
41168 /* New x86-64 SSE registers result in bigger code size. Discourage
41169 them. */
41170 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
41171 return 2;
41172 /* Usage of AX register results in smaller code. Prefer it. */
41173 if (hard_regno == AX_REG)
41174 return 4;
41175 return 3;
41178 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
41180 Put float CONST_DOUBLE in the constant pool instead of fp regs.
41181 QImode must go into class Q_REGS.
41182 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
41183 movdf to do mem-to-mem moves through integer regs. */
41185 static reg_class_t
41186 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
41188 machine_mode mode = GET_MODE (x);
41190 /* We're only allowed to return a subclass of CLASS. Many of the
41191 following checks fail for NO_REGS, so eliminate that early. */
41192 if (regclass == NO_REGS)
41193 return NO_REGS;
41195 /* All classes can load zeros. */
41196 if (x == CONST0_RTX (mode))
41197 return regclass;
41199 /* Force constants into memory if we are loading a (nonzero) constant into
41200 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
41201 instructions to load from a constant. */
41202 if (CONSTANT_P (x)
41203 && (MAYBE_MMX_CLASS_P (regclass)
41204 || MAYBE_SSE_CLASS_P (regclass)
41205 || MAYBE_MASK_CLASS_P (regclass)))
41206 return NO_REGS;
41208 /* Prefer SSE regs only, if we can use them for math. */
41209 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
41210 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
41212 /* Floating-point constants need more complex checks. */
41213 if (CONST_DOUBLE_P (x))
41215 /* General regs can load everything. */
41216 if (reg_class_subset_p (regclass, GENERAL_REGS))
41217 return regclass;
41219 /* Floats can load 0 and 1 plus some others. Note that we eliminated
41220 zero above. We only want to wind up preferring 80387 registers if
41221 we plan on doing computation with them. */
41222 if (TARGET_80387
41223 && standard_80387_constant_p (x) > 0)
41225 /* Limit class to non-sse. */
41226 if (regclass == FLOAT_SSE_REGS)
41227 return FLOAT_REGS;
41228 if (regclass == FP_TOP_SSE_REGS)
41229 return FP_TOP_REG;
41230 if (regclass == FP_SECOND_SSE_REGS)
41231 return FP_SECOND_REG;
41232 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
41233 return regclass;
41236 return NO_REGS;
41239 /* Generally when we see PLUS here, it's the function invariant
41240 (plus soft-fp const_int). Which can only be computed into general
41241 regs. */
41242 if (GET_CODE (x) == PLUS)
41243 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
41245 /* QImode constants are easy to load, but non-constant QImode data
41246 must go into Q_REGS. */
41247 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
41249 if (reg_class_subset_p (regclass, Q_REGS))
41250 return regclass;
41251 if (reg_class_subset_p (Q_REGS, regclass))
41252 return Q_REGS;
41253 return NO_REGS;
41256 return regclass;
41259 /* Discourage putting floating-point values in SSE registers unless
41260 SSE math is being used, and likewise for the 387 registers. */
41261 static reg_class_t
41262 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
41264 machine_mode mode = GET_MODE (x);
41266 /* Restrict the output reload class to the register bank that we are doing
41267 math on. If we would like not to return a subset of CLASS, reject this
41268 alternative: if reload cannot do this, it will still use its choice. */
41269 mode = GET_MODE (x);
41270 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
41271 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
41273 if (X87_FLOAT_MODE_P (mode))
41275 if (regclass == FP_TOP_SSE_REGS)
41276 return FP_TOP_REG;
41277 else if (regclass == FP_SECOND_SSE_REGS)
41278 return FP_SECOND_REG;
41279 else
41280 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41283 return regclass;
41286 static reg_class_t
41287 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41288 machine_mode mode, secondary_reload_info *sri)
41290 /* Double-word spills from general registers to non-offsettable memory
41291 references (zero-extended addresses) require special handling. */
41292 if (TARGET_64BIT
41293 && MEM_P (x)
41294 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41295 && INTEGER_CLASS_P (rclass)
41296 && !offsettable_memref_p (x))
41298 sri->icode = (in_p
41299 ? CODE_FOR_reload_noff_load
41300 : CODE_FOR_reload_noff_store);
41301 /* Add the cost of moving address to a temporary. */
41302 sri->extra_cost = 1;
41304 return NO_REGS;
41307 /* QImode spills from non-QI registers require
41308 intermediate register on 32bit targets. */
41309 if (mode == QImode
41310 && (MAYBE_MASK_CLASS_P (rclass)
41311 || (!TARGET_64BIT && !in_p
41312 && INTEGER_CLASS_P (rclass)
41313 && MAYBE_NON_Q_CLASS_P (rclass))))
41315 int regno;
41317 if (REG_P (x))
41318 regno = REGNO (x);
41319 else
41320 regno = -1;
41322 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
41323 regno = true_regnum (x);
41325 /* Return Q_REGS if the operand is in memory. */
41326 if (regno == -1)
41327 return Q_REGS;
41330 /* This condition handles corner case where an expression involving
41331 pointers gets vectorized. We're trying to use the address of a
41332 stack slot as a vector initializer.
41334 (set (reg:V2DI 74 [ vect_cst_.2 ])
41335 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41337 Eventually frame gets turned into sp+offset like this:
41339 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41340 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41341 (const_int 392 [0x188]))))
41343 That later gets turned into:
41345 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41346 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41347 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41349 We'll have the following reload recorded:
41351 Reload 0: reload_in (DI) =
41352 (plus:DI (reg/f:DI 7 sp)
41353 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41354 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41355 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41356 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41357 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41358 reload_reg_rtx: (reg:V2DI 22 xmm1)
41360 Which isn't going to work since SSE instructions can't handle scalar
41361 additions. Returning GENERAL_REGS forces the addition into integer
41362 register and reload can handle subsequent reloads without problems. */
41364 if (in_p && GET_CODE (x) == PLUS
41365 && SSE_CLASS_P (rclass)
41366 && SCALAR_INT_MODE_P (mode))
41367 return GENERAL_REGS;
41369 return NO_REGS;
41372 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
41374 static bool
41375 ix86_class_likely_spilled_p (reg_class_t rclass)
41377 switch (rclass)
41379 case AREG:
41380 case DREG:
41381 case CREG:
41382 case BREG:
41383 case AD_REGS:
41384 case SIREG:
41385 case DIREG:
41386 case SSE_FIRST_REG:
41387 case FP_TOP_REG:
41388 case FP_SECOND_REG:
41389 case BND_REGS:
41390 return true;
41392 default:
41393 break;
41396 return false;
41399 /* If we are copying between general and FP registers, we need a memory
41400 location. The same is true for SSE and MMX registers.
41402 To optimize register_move_cost performance, allow inline variant.
41404 The macro can't work reliably when one of the CLASSES is class containing
41405 registers from multiple units (SSE, MMX, integer). We avoid this by never
41406 combining those units in single alternative in the machine description.
41407 Ensure that this constraint holds to avoid unexpected surprises.
41409 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41410 enforce these sanity checks. */
41412 static inline bool
41413 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41414 machine_mode mode, int strict)
41416 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41417 return false;
41418 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41419 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41420 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41421 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41422 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41423 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41425 gcc_assert (!strict || lra_in_progress);
41426 return true;
41429 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41430 return true;
41432 /* Between mask and general, we have moves no larger than word size. */
41433 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41434 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41435 return true;
41437 /* ??? This is a lie. We do have moves between mmx/general, and for
41438 mmx/sse2. But by saying we need secondary memory we discourage the
41439 register allocator from using the mmx registers unless needed. */
41440 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41441 return true;
41443 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41445 /* SSE1 doesn't have any direct moves from other classes. */
41446 if (!TARGET_SSE2)
41447 return true;
41449 /* If the target says that inter-unit moves are more expensive
41450 than moving through memory, then don't generate them. */
41451 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41452 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41453 return true;
41455 /* Between SSE and general, we have moves no larger than word size. */
41456 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41457 return true;
41460 return false;
41463 bool
41464 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41465 machine_mode mode, int strict)
41467 return inline_secondary_memory_needed (class1, class2, mode, strict);
41470 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41472 On the 80386, this is the size of MODE in words,
41473 except in the FP regs, where a single reg is always enough. */
41475 static unsigned char
41476 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41478 if (MAYBE_INTEGER_CLASS_P (rclass))
41480 if (mode == XFmode)
41481 return (TARGET_64BIT ? 2 : 3);
41482 else if (mode == XCmode)
41483 return (TARGET_64BIT ? 4 : 6);
41484 else
41485 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41487 else
41489 if (COMPLEX_MODE_P (mode))
41490 return 2;
41491 else
41492 return 1;
41496 /* Return true if the registers in CLASS cannot represent the change from
41497 modes FROM to TO. */
41499 bool
41500 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41501 enum reg_class regclass)
41503 if (from == to)
41504 return false;
41506 /* x87 registers can't do subreg at all, as all values are reformatted
41507 to extended precision. */
41508 if (MAYBE_FLOAT_CLASS_P (regclass))
41509 return true;
41511 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41513 /* Vector registers do not support QI or HImode loads. If we don't
41514 disallow a change to these modes, reload will assume it's ok to
41515 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
41516 the vec_dupv4hi pattern. */
41517 if (GET_MODE_SIZE (from) < 4)
41518 return true;
41521 return false;
41524 /* Return the cost of moving data of mode M between a
41525 register and memory. A value of 2 is the default; this cost is
41526 relative to those in `REGISTER_MOVE_COST'.
41528 This function is used extensively by register_move_cost that is used to
41529 build tables at startup. Make it inline in this case.
41530 When IN is 2, return maximum of in and out move cost.
41532 If moving between registers and memory is more expensive than
41533 between two registers, you should define this macro to express the
41534 relative cost.
41536 Model also increased moving costs of QImode registers in non
41537 Q_REGS classes.
41539 static inline int
41540 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41541 int in)
41543 int cost;
41544 if (FLOAT_CLASS_P (regclass))
41546 int index;
41547 switch (mode)
41549 case SFmode:
41550 index = 0;
41551 break;
41552 case DFmode:
41553 index = 1;
41554 break;
41555 case XFmode:
41556 index = 2;
41557 break;
41558 default:
41559 return 100;
41561 if (in == 2)
41562 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41563 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41565 if (SSE_CLASS_P (regclass))
41567 int index;
41568 switch (GET_MODE_SIZE (mode))
41570 case 4:
41571 index = 0;
41572 break;
41573 case 8:
41574 index = 1;
41575 break;
41576 case 16:
41577 index = 2;
41578 break;
41579 default:
41580 return 100;
41582 if (in == 2)
41583 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41584 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41586 if (MMX_CLASS_P (regclass))
41588 int index;
41589 switch (GET_MODE_SIZE (mode))
41591 case 4:
41592 index = 0;
41593 break;
41594 case 8:
41595 index = 1;
41596 break;
41597 default:
41598 return 100;
41600 if (in)
41601 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41602 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41604 switch (GET_MODE_SIZE (mode))
41606 case 1:
41607 if (Q_CLASS_P (regclass) || TARGET_64BIT)
41609 if (!in)
41610 return ix86_cost->int_store[0];
41611 if (TARGET_PARTIAL_REG_DEPENDENCY
41612 && optimize_function_for_speed_p (cfun))
41613 cost = ix86_cost->movzbl_load;
41614 else
41615 cost = ix86_cost->int_load[0];
41616 if (in == 2)
41617 return MAX (cost, ix86_cost->int_store[0]);
41618 return cost;
41620 else
41622 if (in == 2)
41623 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41624 if (in)
41625 return ix86_cost->movzbl_load;
41626 else
41627 return ix86_cost->int_store[0] + 4;
41629 break;
41630 case 2:
41631 if (in == 2)
41632 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41633 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41634 default:
41635 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
41636 if (mode == TFmode)
41637 mode = XFmode;
41638 if (in == 2)
41639 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
41640 else if (in)
41641 cost = ix86_cost->int_load[2];
41642 else
41643 cost = ix86_cost->int_store[2];
41644 return (cost * (((int) GET_MODE_SIZE (mode)
41645 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
41649 static int
41650 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
41651 bool in)
41653 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
41657 /* Return the cost of moving data from a register in class CLASS1 to
41658 one in class CLASS2.
41660 It is not required that the cost always equal 2 when FROM is the same as TO;
41661 on some machines it is expensive to move between registers if they are not
41662 general registers. */
41664 static int
41665 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
41666 reg_class_t class2_i)
41668 enum reg_class class1 = (enum reg_class) class1_i;
41669 enum reg_class class2 = (enum reg_class) class2_i;
41671 /* In case we require secondary memory, compute cost of the store followed
41672 by load. In order to avoid bad register allocation choices, we need
41673 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
41675 if (inline_secondary_memory_needed (class1, class2, mode, 0))
41677 int cost = 1;
41679 cost += inline_memory_move_cost (mode, class1, 2);
41680 cost += inline_memory_move_cost (mode, class2, 2);
41682 /* In case of copying from general_purpose_register we may emit multiple
41683 stores followed by single load causing memory size mismatch stall.
41684 Count this as arbitrarily high cost of 20. */
41685 if (targetm.class_max_nregs (class1, mode)
41686 > targetm.class_max_nregs (class2, mode))
41687 cost += 20;
41689 /* In the case of FP/MMX moves, the registers actually overlap, and we
41690 have to switch modes in order to treat them differently. */
41691 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
41692 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
41693 cost += 20;
41695 return cost;
41698 /* Moves between SSE/MMX and integer unit are expensive. */
41699 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
41700 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41702 /* ??? By keeping returned value relatively high, we limit the number
41703 of moves between integer and MMX/SSE registers for all targets.
41704 Additionally, high value prevents problem with x86_modes_tieable_p(),
41705 where integer modes in MMX/SSE registers are not tieable
41706 because of missing QImode and HImode moves to, from or between
41707 MMX/SSE registers. */
41708 return MAX (8, ix86_cost->mmxsse_to_integer);
41710 if (MAYBE_FLOAT_CLASS_P (class1))
41711 return ix86_cost->fp_move;
41712 if (MAYBE_SSE_CLASS_P (class1))
41713 return ix86_cost->sse_move;
41714 if (MAYBE_MMX_CLASS_P (class1))
41715 return ix86_cost->mmx_move;
41716 return 2;
41719 /* Return TRUE if hard register REGNO can hold a value of machine-mode
41720 MODE. */
41722 bool
41723 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
41725 /* Flags and only flags can only hold CCmode values. */
41726 if (CC_REGNO_P (regno))
41727 return GET_MODE_CLASS (mode) == MODE_CC;
41728 if (GET_MODE_CLASS (mode) == MODE_CC
41729 || GET_MODE_CLASS (mode) == MODE_RANDOM
41730 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
41731 return false;
41732 if (STACK_REGNO_P (regno))
41733 return VALID_FP_MODE_P (mode);
41734 if (MASK_REGNO_P (regno))
41735 return (VALID_MASK_REG_MODE (mode)
41736 || (TARGET_AVX512BW
41737 && VALID_MASK_AVX512BW_MODE (mode)));
41738 if (BND_REGNO_P (regno))
41739 return VALID_BND_REG_MODE (mode);
41740 if (SSE_REGNO_P (regno))
41742 /* We implement the move patterns for all vector modes into and
41743 out of SSE registers, even when no operation instructions
41744 are available. */
41746 /* For AVX-512 we allow, regardless of regno:
41747 - XI mode
41748 - any of 512-bit wide vector mode
41749 - any scalar mode. */
41750 if (TARGET_AVX512F
41751 && (mode == XImode
41752 || VALID_AVX512F_REG_MODE (mode)
41753 || VALID_AVX512F_SCALAR_MODE (mode)))
41754 return true;
41756 /* TODO check for QI/HI scalars. */
41757 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
41758 if (TARGET_AVX512VL
41759 && (mode == OImode
41760 || mode == TImode
41761 || VALID_AVX256_REG_MODE (mode)
41762 || VALID_AVX512VL_128_REG_MODE (mode)))
41763 return true;
41765 /* xmm16-xmm31 are only available for AVX-512. */
41766 if (EXT_REX_SSE_REGNO_P (regno))
41767 return false;
41769 /* OImode and AVX modes are available only when AVX is enabled. */
41770 return ((TARGET_AVX
41771 && VALID_AVX256_REG_OR_OI_MODE (mode))
41772 || VALID_SSE_REG_MODE (mode)
41773 || VALID_SSE2_REG_MODE (mode)
41774 || VALID_MMX_REG_MODE (mode)
41775 || VALID_MMX_REG_MODE_3DNOW (mode));
41777 if (MMX_REGNO_P (regno))
41779 /* We implement the move patterns for 3DNOW modes even in MMX mode,
41780 so if the register is available at all, then we can move data of
41781 the given mode into or out of it. */
41782 return (VALID_MMX_REG_MODE (mode)
41783 || VALID_MMX_REG_MODE_3DNOW (mode));
41786 if (mode == QImode)
41788 /* Take care for QImode values - they can be in non-QI regs,
41789 but then they do cause partial register stalls. */
41790 if (ANY_QI_REGNO_P (regno))
41791 return true;
41792 if (!TARGET_PARTIAL_REG_STALL)
41793 return true;
41794 /* LRA checks if the hard register is OK for the given mode.
41795 QImode values can live in non-QI regs, so we allow all
41796 registers here. */
41797 if (lra_in_progress)
41798 return true;
41799 return !can_create_pseudo_p ();
41801 /* We handle both integer and floats in the general purpose registers. */
41802 else if (VALID_INT_MODE_P (mode))
41803 return true;
41804 else if (VALID_FP_MODE_P (mode))
41805 return true;
41806 else if (VALID_DFP_MODE_P (mode))
41807 return true;
41808 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
41809 on to use that value in smaller contexts, this can easily force a
41810 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
41811 supporting DImode, allow it. */
41812 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
41813 return true;
41815 return false;
41818 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
41819 tieable integer mode. */
41821 static bool
41822 ix86_tieable_integer_mode_p (machine_mode mode)
41824 switch (mode)
41826 case HImode:
41827 case SImode:
41828 return true;
41830 case QImode:
41831 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
41833 case DImode:
41834 return TARGET_64BIT;
41836 default:
41837 return false;
41841 /* Return true if MODE1 is accessible in a register that can hold MODE2
41842 without copying. That is, all register classes that can hold MODE2
41843 can also hold MODE1. */
41845 bool
41846 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
41848 if (mode1 == mode2)
41849 return true;
41851 if (ix86_tieable_integer_mode_p (mode1)
41852 && ix86_tieable_integer_mode_p (mode2))
41853 return true;
41855 /* MODE2 being XFmode implies fp stack or general regs, which means we
41856 can tie any smaller floating point modes to it. Note that we do not
41857 tie this with TFmode. */
41858 if (mode2 == XFmode)
41859 return mode1 == SFmode || mode1 == DFmode;
41861 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
41862 that we can tie it with SFmode. */
41863 if (mode2 == DFmode)
41864 return mode1 == SFmode;
41866 /* If MODE2 is only appropriate for an SSE register, then tie with
41867 any other mode acceptable to SSE registers. */
41868 if (GET_MODE_SIZE (mode2) == 32
41869 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41870 return (GET_MODE_SIZE (mode1) == 32
41871 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41872 if (GET_MODE_SIZE (mode2) == 16
41873 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41874 return (GET_MODE_SIZE (mode1) == 16
41875 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41877 /* If MODE2 is appropriate for an MMX register, then tie
41878 with any other mode acceptable to MMX registers. */
41879 if (GET_MODE_SIZE (mode2) == 8
41880 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
41881 return (GET_MODE_SIZE (mode1) == 8
41882 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
41884 return false;
41887 /* Return the cost of moving between two registers of mode MODE. */
41889 static int
41890 ix86_set_reg_reg_cost (machine_mode mode)
41892 unsigned int units = UNITS_PER_WORD;
41894 switch (GET_MODE_CLASS (mode))
41896 default:
41897 break;
41899 case MODE_CC:
41900 units = GET_MODE_SIZE (CCmode);
41901 break;
41903 case MODE_FLOAT:
41904 if ((TARGET_SSE && mode == TFmode)
41905 || (TARGET_80387 && mode == XFmode)
41906 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
41907 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
41908 units = GET_MODE_SIZE (mode);
41909 break;
41911 case MODE_COMPLEX_FLOAT:
41912 if ((TARGET_SSE && mode == TCmode)
41913 || (TARGET_80387 && mode == XCmode)
41914 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
41915 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
41916 units = GET_MODE_SIZE (mode);
41917 break;
41919 case MODE_VECTOR_INT:
41920 case MODE_VECTOR_FLOAT:
41921 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41922 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41923 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41924 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41925 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
41926 units = GET_MODE_SIZE (mode);
41929 /* Return the cost of moving between two registers of mode MODE,
41930 assuming that the move will be in pieces of at most UNITS bytes. */
41931 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
41934 /* Compute a (partial) cost for rtx X. Return true if the complete
41935 cost has been computed, and false if subexpressions should be
41936 scanned. In either case, *TOTAL contains the cost result. */
41938 static bool
41939 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
41940 bool speed)
41942 rtx mask;
41943 enum rtx_code code = (enum rtx_code) code_i;
41944 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
41945 machine_mode mode = GET_MODE (x);
41946 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
41948 switch (code)
41950 case SET:
41951 if (register_operand (SET_DEST (x), VOIDmode)
41952 && reg_or_0_operand (SET_SRC (x), VOIDmode))
41954 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
41955 return true;
41957 return false;
41959 case CONST_INT:
41960 case CONST:
41961 case LABEL_REF:
41962 case SYMBOL_REF:
41963 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
41964 *total = 3;
41965 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
41966 *total = 2;
41967 else if (flag_pic && SYMBOLIC_CONST (x)
41968 && !(TARGET_64BIT
41969 && (GET_CODE (x) == LABEL_REF
41970 || (GET_CODE (x) == SYMBOL_REF
41971 && SYMBOL_REF_LOCAL_P (x)))))
41972 *total = 1;
41973 else
41974 *total = 0;
41975 return true;
41977 case CONST_WIDE_INT:
41978 *total = 0;
41979 return true;
41981 case CONST_DOUBLE:
41982 switch (standard_80387_constant_p (x))
41984 case 1: /* 0.0 */
41985 *total = 1;
41986 return true;
41987 default: /* Other constants */
41988 *total = 2;
41989 return true;
41990 case 0:
41991 case -1:
41992 break;
41994 if (SSE_FLOAT_MODE_P (mode))
41996 case CONST_VECTOR:
41997 switch (standard_sse_constant_p (x))
41999 case 0:
42000 break;
42001 case 1: /* 0: xor eliminates false dependency */
42002 *total = 0;
42003 return true;
42004 default: /* -1: cmp contains false dependency */
42005 *total = 1;
42006 return true;
42009 /* Fall back to (MEM (SYMBOL_REF)), since that's where
42010 it'll probably end up. Add a penalty for size. */
42011 *total = (COSTS_N_INSNS (1)
42012 + (flag_pic != 0 && !TARGET_64BIT)
42013 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
42014 return true;
42016 case ZERO_EXTEND:
42017 /* The zero extensions is often completely free on x86_64, so make
42018 it as cheap as possible. */
42019 if (TARGET_64BIT && mode == DImode
42020 && GET_MODE (XEXP (x, 0)) == SImode)
42021 *total = 1;
42022 else if (TARGET_ZERO_EXTEND_WITH_AND)
42023 *total = cost->add;
42024 else
42025 *total = cost->movzx;
42026 return false;
42028 case SIGN_EXTEND:
42029 *total = cost->movsx;
42030 return false;
42032 case ASHIFT:
42033 if (SCALAR_INT_MODE_P (mode)
42034 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
42035 && CONST_INT_P (XEXP (x, 1)))
42037 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42038 if (value == 1)
42040 *total = cost->add;
42041 return false;
42043 if ((value == 2 || value == 3)
42044 && cost->lea <= cost->shift_const)
42046 *total = cost->lea;
42047 return false;
42050 /* FALLTHRU */
42052 case ROTATE:
42053 case ASHIFTRT:
42054 case LSHIFTRT:
42055 case ROTATERT:
42056 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42058 /* ??? Should be SSE vector operation cost. */
42059 /* At least for published AMD latencies, this really is the same
42060 as the latency for a simple fpu operation like fabs. */
42061 /* V*QImode is emulated with 1-11 insns. */
42062 if (mode == V16QImode || mode == V32QImode)
42064 int count = 11;
42065 if (TARGET_XOP && mode == V16QImode)
42067 /* For XOP we use vpshab, which requires a broadcast of the
42068 value to the variable shift insn. For constants this
42069 means a V16Q const in mem; even when we can perform the
42070 shift with one insn set the cost to prefer paddb. */
42071 if (CONSTANT_P (XEXP (x, 1)))
42073 *total = (cost->fabs
42074 + rtx_cost (XEXP (x, 0), code, 0, speed)
42075 + (speed ? 2 : COSTS_N_BYTES (16)));
42076 return true;
42078 count = 3;
42080 else if (TARGET_SSSE3)
42081 count = 7;
42082 *total = cost->fabs * count;
42084 else
42085 *total = cost->fabs;
42087 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42089 if (CONST_INT_P (XEXP (x, 1)))
42091 if (INTVAL (XEXP (x, 1)) > 32)
42092 *total = cost->shift_const + COSTS_N_INSNS (2);
42093 else
42094 *total = cost->shift_const * 2;
42096 else
42098 if (GET_CODE (XEXP (x, 1)) == AND)
42099 *total = cost->shift_var * 2;
42100 else
42101 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
42104 else
42106 if (CONST_INT_P (XEXP (x, 1)))
42107 *total = cost->shift_const;
42108 else if (GET_CODE (XEXP (x, 1)) == SUBREG
42109 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
42111 /* Return the cost after shift-and truncation. */
42112 *total = cost->shift_var;
42113 return true;
42115 else
42116 *total = cost->shift_var;
42118 return false;
42120 case FMA:
42122 rtx sub;
42124 gcc_assert (FLOAT_MODE_P (mode));
42125 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
42127 /* ??? SSE scalar/vector cost should be used here. */
42128 /* ??? Bald assumption that fma has the same cost as fmul. */
42129 *total = cost->fmul;
42130 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
42132 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
42133 sub = XEXP (x, 0);
42134 if (GET_CODE (sub) == NEG)
42135 sub = XEXP (sub, 0);
42136 *total += rtx_cost (sub, FMA, 0, speed);
42138 sub = XEXP (x, 2);
42139 if (GET_CODE (sub) == NEG)
42140 sub = XEXP (sub, 0);
42141 *total += rtx_cost (sub, FMA, 2, speed);
42142 return true;
42145 case MULT:
42146 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42148 /* ??? SSE scalar cost should be used here. */
42149 *total = cost->fmul;
42150 return false;
42152 else if (X87_FLOAT_MODE_P (mode))
42154 *total = cost->fmul;
42155 return false;
42157 else if (FLOAT_MODE_P (mode))
42159 /* ??? SSE vector cost should be used here. */
42160 *total = cost->fmul;
42161 return false;
42163 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42165 /* V*QImode is emulated with 7-13 insns. */
42166 if (mode == V16QImode || mode == V32QImode)
42168 int extra = 11;
42169 if (TARGET_XOP && mode == V16QImode)
42170 extra = 5;
42171 else if (TARGET_SSSE3)
42172 extra = 6;
42173 *total = cost->fmul * 2 + cost->fabs * extra;
42175 /* V*DImode is emulated with 5-8 insns. */
42176 else if (mode == V2DImode || mode == V4DImode)
42178 if (TARGET_XOP && mode == V2DImode)
42179 *total = cost->fmul * 2 + cost->fabs * 3;
42180 else
42181 *total = cost->fmul * 3 + cost->fabs * 5;
42183 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
42184 insns, including two PMULUDQ. */
42185 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
42186 *total = cost->fmul * 2 + cost->fabs * 5;
42187 else
42188 *total = cost->fmul;
42189 return false;
42191 else
42193 rtx op0 = XEXP (x, 0);
42194 rtx op1 = XEXP (x, 1);
42195 int nbits;
42196 if (CONST_INT_P (XEXP (x, 1)))
42198 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42199 for (nbits = 0; value != 0; value &= value - 1)
42200 nbits++;
42202 else
42203 /* This is arbitrary. */
42204 nbits = 7;
42206 /* Compute costs correctly for widening multiplication. */
42207 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
42208 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
42209 == GET_MODE_SIZE (mode))
42211 int is_mulwiden = 0;
42212 machine_mode inner_mode = GET_MODE (op0);
42214 if (GET_CODE (op0) == GET_CODE (op1))
42215 is_mulwiden = 1, op1 = XEXP (op1, 0);
42216 else if (CONST_INT_P (op1))
42218 if (GET_CODE (op0) == SIGN_EXTEND)
42219 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
42220 == INTVAL (op1);
42221 else
42222 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
42225 if (is_mulwiden)
42226 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
42229 *total = (cost->mult_init[MODE_INDEX (mode)]
42230 + nbits * cost->mult_bit
42231 + rtx_cost (op0, outer_code, opno, speed)
42232 + rtx_cost (op1, outer_code, opno, speed));
42234 return true;
42237 case DIV:
42238 case UDIV:
42239 case MOD:
42240 case UMOD:
42241 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42242 /* ??? SSE cost should be used here. */
42243 *total = cost->fdiv;
42244 else if (X87_FLOAT_MODE_P (mode))
42245 *total = cost->fdiv;
42246 else if (FLOAT_MODE_P (mode))
42247 /* ??? SSE vector cost should be used here. */
42248 *total = cost->fdiv;
42249 else
42250 *total = cost->divide[MODE_INDEX (mode)];
42251 return false;
42253 case PLUS:
42254 if (GET_MODE_CLASS (mode) == MODE_INT
42255 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
42257 if (GET_CODE (XEXP (x, 0)) == PLUS
42258 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
42259 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
42260 && CONSTANT_P (XEXP (x, 1)))
42262 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
42263 if (val == 2 || val == 4 || val == 8)
42265 *total = cost->lea;
42266 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42267 outer_code, opno, speed);
42268 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
42269 outer_code, opno, speed);
42270 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42271 return true;
42274 else if (GET_CODE (XEXP (x, 0)) == MULT
42275 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
42277 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
42278 if (val == 2 || val == 4 || val == 8)
42280 *total = cost->lea;
42281 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42282 outer_code, opno, speed);
42283 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42284 return true;
42287 else if (GET_CODE (XEXP (x, 0)) == PLUS)
42289 *total = cost->lea;
42290 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42291 outer_code, opno, speed);
42292 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42293 outer_code, opno, speed);
42294 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42295 return true;
42298 /* FALLTHRU */
42300 case MINUS:
42301 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42303 /* ??? SSE cost should be used here. */
42304 *total = cost->fadd;
42305 return false;
42307 else if (X87_FLOAT_MODE_P (mode))
42309 *total = cost->fadd;
42310 return false;
42312 else if (FLOAT_MODE_P (mode))
42314 /* ??? SSE vector cost should be used here. */
42315 *total = cost->fadd;
42316 return false;
42318 /* FALLTHRU */
42320 case AND:
42321 case IOR:
42322 case XOR:
42323 if (GET_MODE_CLASS (mode) == MODE_INT
42324 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42326 *total = (cost->add * 2
42327 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
42328 << (GET_MODE (XEXP (x, 0)) != DImode))
42329 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
42330 << (GET_MODE (XEXP (x, 1)) != DImode)));
42331 return true;
42333 /* FALLTHRU */
42335 case NEG:
42336 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42338 /* ??? SSE cost should be used here. */
42339 *total = cost->fchs;
42340 return false;
42342 else if (X87_FLOAT_MODE_P (mode))
42344 *total = cost->fchs;
42345 return false;
42347 else if (FLOAT_MODE_P (mode))
42349 /* ??? SSE vector cost should be used here. */
42350 *total = cost->fchs;
42351 return false;
42353 /* FALLTHRU */
42355 case NOT:
42356 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42358 /* ??? Should be SSE vector operation cost. */
42359 /* At least for published AMD latencies, this really is the same
42360 as the latency for a simple fpu operation like fabs. */
42361 *total = cost->fabs;
42363 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42364 *total = cost->add * 2;
42365 else
42366 *total = cost->add;
42367 return false;
42369 case COMPARE:
42370 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42371 && XEXP (XEXP (x, 0), 1) == const1_rtx
42372 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42373 && XEXP (x, 1) == const0_rtx)
42375 /* This kind of construct is implemented using test[bwl].
42376 Treat it as if we had an AND. */
42377 *total = (cost->add
42378 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
42379 + rtx_cost (const1_rtx, outer_code, opno, speed));
42380 return true;
42382 return false;
42384 case FLOAT_EXTEND:
42385 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42386 *total = 0;
42387 return false;
42389 case ABS:
42390 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42391 /* ??? SSE cost should be used here. */
42392 *total = cost->fabs;
42393 else if (X87_FLOAT_MODE_P (mode))
42394 *total = cost->fabs;
42395 else if (FLOAT_MODE_P (mode))
42396 /* ??? SSE vector cost should be used here. */
42397 *total = cost->fabs;
42398 return false;
42400 case SQRT:
42401 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42402 /* ??? SSE cost should be used here. */
42403 *total = cost->fsqrt;
42404 else if (X87_FLOAT_MODE_P (mode))
42405 *total = cost->fsqrt;
42406 else if (FLOAT_MODE_P (mode))
42407 /* ??? SSE vector cost should be used here. */
42408 *total = cost->fsqrt;
42409 return false;
42411 case UNSPEC:
42412 if (XINT (x, 1) == UNSPEC_TP)
42413 *total = 0;
42414 return false;
42416 case VEC_SELECT:
42417 case VEC_CONCAT:
42418 case VEC_DUPLICATE:
42419 /* ??? Assume all of these vector manipulation patterns are
42420 recognizable. In which case they all pretty much have the
42421 same cost. */
42422 *total = cost->fabs;
42423 return true;
42424 case VEC_MERGE:
42425 mask = XEXP (x, 2);
42426 /* This is masked instruction, assume the same cost,
42427 as nonmasked variant. */
42428 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42429 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
42430 else
42431 *total = cost->fabs;
42432 return true;
42434 default:
42435 return false;
42439 #if TARGET_MACHO
42441 static int current_machopic_label_num;
42443 /* Given a symbol name and its associated stub, write out the
42444 definition of the stub. */
42446 void
42447 machopic_output_stub (FILE *file, const char *symb, const char *stub)
42449 unsigned int length;
42450 char *binder_name, *symbol_name, lazy_ptr_name[32];
42451 int label = ++current_machopic_label_num;
42453 /* For 64-bit we shouldn't get here. */
42454 gcc_assert (!TARGET_64BIT);
42456 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
42457 symb = targetm.strip_name_encoding (symb);
42459 length = strlen (stub);
42460 binder_name = XALLOCAVEC (char, length + 32);
42461 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42463 length = strlen (symb);
42464 symbol_name = XALLOCAVEC (char, length + 32);
42465 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42467 sprintf (lazy_ptr_name, "L%d$lz", label);
42469 if (MACHOPIC_ATT_STUB)
42470 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42471 else if (MACHOPIC_PURE)
42472 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42473 else
42474 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42476 fprintf (file, "%s:\n", stub);
42477 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42479 if (MACHOPIC_ATT_STUB)
42481 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42483 else if (MACHOPIC_PURE)
42485 /* PIC stub. */
42486 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42487 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42488 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
42489 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42490 label, lazy_ptr_name, label);
42491 fprintf (file, "\tjmp\t*%%ecx\n");
42493 else
42494 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42496 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42497 it needs no stub-binding-helper. */
42498 if (MACHOPIC_ATT_STUB)
42499 return;
42501 fprintf (file, "%s:\n", binder_name);
42503 if (MACHOPIC_PURE)
42505 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42506 fprintf (file, "\tpushl\t%%ecx\n");
42508 else
42509 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42511 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42513 /* N.B. Keep the correspondence of these
42514 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42515 old-pic/new-pic/non-pic stubs; altering this will break
42516 compatibility with existing dylibs. */
42517 if (MACHOPIC_PURE)
42519 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42520 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42522 else
42523 /* 16-byte -mdynamic-no-pic stub. */
42524 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42526 fprintf (file, "%s:\n", lazy_ptr_name);
42527 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42528 fprintf (file, ASM_LONG "%s\n", binder_name);
42530 #endif /* TARGET_MACHO */
42532 /* Order the registers for register allocator. */
42534 void
42535 x86_order_regs_for_local_alloc (void)
42537 int pos = 0;
42538 int i;
42540 /* First allocate the local general purpose registers. */
42541 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42542 if (GENERAL_REGNO_P (i) && call_used_regs[i])
42543 reg_alloc_order [pos++] = i;
42545 /* Global general purpose registers. */
42546 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42547 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42548 reg_alloc_order [pos++] = i;
42550 /* x87 registers come first in case we are doing FP math
42551 using them. */
42552 if (!TARGET_SSE_MATH)
42553 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42554 reg_alloc_order [pos++] = i;
42556 /* SSE registers. */
42557 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42558 reg_alloc_order [pos++] = i;
42559 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42560 reg_alloc_order [pos++] = i;
42562 /* Extended REX SSE registers. */
42563 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42564 reg_alloc_order [pos++] = i;
42566 /* Mask register. */
42567 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42568 reg_alloc_order [pos++] = i;
42570 /* MPX bound registers. */
42571 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42572 reg_alloc_order [pos++] = i;
42574 /* x87 registers. */
42575 if (TARGET_SSE_MATH)
42576 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42577 reg_alloc_order [pos++] = i;
42579 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42580 reg_alloc_order [pos++] = i;
42582 /* Initialize the rest of array as we do not allocate some registers
42583 at all. */
42584 while (pos < FIRST_PSEUDO_REGISTER)
42585 reg_alloc_order [pos++] = 0;
42588 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42589 in struct attribute_spec handler. */
42590 static tree
42591 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42592 tree args,
42593 int,
42594 bool *no_add_attrs)
42596 if (TREE_CODE (*node) != FUNCTION_TYPE
42597 && TREE_CODE (*node) != METHOD_TYPE
42598 && TREE_CODE (*node) != FIELD_DECL
42599 && TREE_CODE (*node) != TYPE_DECL)
42601 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42602 name);
42603 *no_add_attrs = true;
42604 return NULL_TREE;
42606 if (TARGET_64BIT)
42608 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42609 name);
42610 *no_add_attrs = true;
42611 return NULL_TREE;
42613 if (is_attribute_p ("callee_pop_aggregate_return", name))
42615 tree cst;
42617 cst = TREE_VALUE (args);
42618 if (TREE_CODE (cst) != INTEGER_CST)
42620 warning (OPT_Wattributes,
42621 "%qE attribute requires an integer constant argument",
42622 name);
42623 *no_add_attrs = true;
42625 else if (compare_tree_int (cst, 0) != 0
42626 && compare_tree_int (cst, 1) != 0)
42628 warning (OPT_Wattributes,
42629 "argument to %qE attribute is neither zero, nor one",
42630 name);
42631 *no_add_attrs = true;
42634 return NULL_TREE;
42637 return NULL_TREE;
42640 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
42641 struct attribute_spec.handler. */
42642 static tree
42643 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
42644 bool *no_add_attrs)
42646 if (TREE_CODE (*node) != FUNCTION_TYPE
42647 && TREE_CODE (*node) != METHOD_TYPE
42648 && TREE_CODE (*node) != FIELD_DECL
42649 && TREE_CODE (*node) != TYPE_DECL)
42651 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42652 name);
42653 *no_add_attrs = true;
42654 return NULL_TREE;
42657 /* Can combine regparm with all attributes but fastcall. */
42658 if (is_attribute_p ("ms_abi", name))
42660 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
42662 error ("ms_abi and sysv_abi attributes are not compatible");
42665 return NULL_TREE;
42667 else if (is_attribute_p ("sysv_abi", name))
42669 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
42671 error ("ms_abi and sysv_abi attributes are not compatible");
42674 return NULL_TREE;
42677 return NULL_TREE;
42680 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
42681 struct attribute_spec.handler. */
42682 static tree
42683 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
42684 bool *no_add_attrs)
42686 tree *type = NULL;
42687 if (DECL_P (*node))
42689 if (TREE_CODE (*node) == TYPE_DECL)
42690 type = &TREE_TYPE (*node);
42692 else
42693 type = node;
42695 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
42697 warning (OPT_Wattributes, "%qE attribute ignored",
42698 name);
42699 *no_add_attrs = true;
42702 else if ((is_attribute_p ("ms_struct", name)
42703 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
42704 || ((is_attribute_p ("gcc_struct", name)
42705 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
42707 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
42708 name);
42709 *no_add_attrs = true;
42712 return NULL_TREE;
42715 static tree
42716 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
42717 bool *no_add_attrs)
42719 if (TREE_CODE (*node) != FUNCTION_DECL)
42721 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42722 name);
42723 *no_add_attrs = true;
42725 return NULL_TREE;
42728 static bool
42729 ix86_ms_bitfield_layout_p (const_tree record_type)
42731 return ((TARGET_MS_BITFIELD_LAYOUT
42732 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
42733 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
42736 /* Returns an expression indicating where the this parameter is
42737 located on entry to the FUNCTION. */
42739 static rtx
42740 x86_this_parameter (tree function)
42742 tree type = TREE_TYPE (function);
42743 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
42744 int nregs;
42746 if (TARGET_64BIT)
42748 const int *parm_regs;
42750 if (ix86_function_type_abi (type) == MS_ABI)
42751 parm_regs = x86_64_ms_abi_int_parameter_registers;
42752 else
42753 parm_regs = x86_64_int_parameter_registers;
42754 return gen_rtx_REG (Pmode, parm_regs[aggr]);
42757 nregs = ix86_function_regparm (type, function);
42759 if (nregs > 0 && !stdarg_p (type))
42761 int regno;
42762 unsigned int ccvt = ix86_get_callcvt (type);
42764 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42765 regno = aggr ? DX_REG : CX_REG;
42766 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42768 regno = CX_REG;
42769 if (aggr)
42770 return gen_rtx_MEM (SImode,
42771 plus_constant (Pmode, stack_pointer_rtx, 4));
42773 else
42775 regno = AX_REG;
42776 if (aggr)
42778 regno = DX_REG;
42779 if (nregs == 1)
42780 return gen_rtx_MEM (SImode,
42781 plus_constant (Pmode,
42782 stack_pointer_rtx, 4));
42785 return gen_rtx_REG (SImode, regno);
42788 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
42789 aggr ? 8 : 4));
42792 /* Determine whether x86_output_mi_thunk can succeed. */
42794 static bool
42795 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
42796 const_tree function)
42798 /* 64-bit can handle anything. */
42799 if (TARGET_64BIT)
42800 return true;
42802 /* For 32-bit, everything's fine if we have one free register. */
42803 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
42804 return true;
42806 /* Need a free register for vcall_offset. */
42807 if (vcall_offset)
42808 return false;
42810 /* Need a free register for GOT references. */
42811 if (flag_pic && !targetm.binds_local_p (function))
42812 return false;
42814 /* Otherwise ok. */
42815 return true;
42818 /* Output the assembler code for a thunk function. THUNK_DECL is the
42819 declaration for the thunk function itself, FUNCTION is the decl for
42820 the target function. DELTA is an immediate constant offset to be
42821 added to THIS. If VCALL_OFFSET is nonzero, the word at
42822 *(*this + vcall_offset) should be added to THIS. */
42824 static void
42825 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
42826 HOST_WIDE_INT vcall_offset, tree function)
42828 rtx this_param = x86_this_parameter (function);
42829 rtx this_reg, tmp, fnaddr;
42830 unsigned int tmp_regno;
42831 rtx_insn *insn;
42833 if (TARGET_64BIT)
42834 tmp_regno = R10_REG;
42835 else
42837 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
42838 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42839 tmp_regno = AX_REG;
42840 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42841 tmp_regno = DX_REG;
42842 else
42843 tmp_regno = CX_REG;
42846 emit_note (NOTE_INSN_PROLOGUE_END);
42848 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
42849 pull it in now and let DELTA benefit. */
42850 if (REG_P (this_param))
42851 this_reg = this_param;
42852 else if (vcall_offset)
42854 /* Put the this parameter into %eax. */
42855 this_reg = gen_rtx_REG (Pmode, AX_REG);
42856 emit_move_insn (this_reg, this_param);
42858 else
42859 this_reg = NULL_RTX;
42861 /* Adjust the this parameter by a fixed constant. */
42862 if (delta)
42864 rtx delta_rtx = GEN_INT (delta);
42865 rtx delta_dst = this_reg ? this_reg : this_param;
42867 if (TARGET_64BIT)
42869 if (!x86_64_general_operand (delta_rtx, Pmode))
42871 tmp = gen_rtx_REG (Pmode, tmp_regno);
42872 emit_move_insn (tmp, delta_rtx);
42873 delta_rtx = tmp;
42877 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
42880 /* Adjust the this parameter by a value stored in the vtable. */
42881 if (vcall_offset)
42883 rtx vcall_addr, vcall_mem, this_mem;
42885 tmp = gen_rtx_REG (Pmode, tmp_regno);
42887 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
42888 if (Pmode != ptr_mode)
42889 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
42890 emit_move_insn (tmp, this_mem);
42892 /* Adjust the this parameter. */
42893 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
42894 if (TARGET_64BIT
42895 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
42897 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
42898 emit_move_insn (tmp2, GEN_INT (vcall_offset));
42899 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
42902 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
42903 if (Pmode != ptr_mode)
42904 emit_insn (gen_addsi_1_zext (this_reg,
42905 gen_rtx_REG (ptr_mode,
42906 REGNO (this_reg)),
42907 vcall_mem));
42908 else
42909 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
42912 /* If necessary, drop THIS back to its stack slot. */
42913 if (this_reg && this_reg != this_param)
42914 emit_move_insn (this_param, this_reg);
42916 fnaddr = XEXP (DECL_RTL (function), 0);
42917 if (TARGET_64BIT)
42919 if (!flag_pic || targetm.binds_local_p (function)
42920 || TARGET_PECOFF)
42922 else
42924 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
42925 tmp = gen_rtx_CONST (Pmode, tmp);
42926 fnaddr = gen_const_mem (Pmode, tmp);
42929 else
42931 if (!flag_pic || targetm.binds_local_p (function))
42933 #if TARGET_MACHO
42934 else if (TARGET_MACHO)
42936 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
42937 fnaddr = XEXP (fnaddr, 0);
42939 #endif /* TARGET_MACHO */
42940 else
42942 tmp = gen_rtx_REG (Pmode, CX_REG);
42943 output_set_got (tmp, NULL_RTX);
42945 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
42946 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
42947 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
42948 fnaddr = gen_const_mem (Pmode, fnaddr);
42952 /* Our sibling call patterns do not allow memories, because we have no
42953 predicate that can distinguish between frame and non-frame memory.
42954 For our purposes here, we can get away with (ab)using a jump pattern,
42955 because we're going to do no optimization. */
42956 if (MEM_P (fnaddr))
42958 if (sibcall_insn_operand (fnaddr, word_mode))
42960 fnaddr = XEXP (DECL_RTL (function), 0);
42961 tmp = gen_rtx_MEM (QImode, fnaddr);
42962 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
42963 tmp = emit_call_insn (tmp);
42964 SIBLING_CALL_P (tmp) = 1;
42966 else
42967 emit_jump_insn (gen_indirect_jump (fnaddr));
42969 else
42971 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
42973 // CM_LARGE_PIC always uses pseudo PIC register which is
42974 // uninitialized. Since FUNCTION is local and calling it
42975 // doesn't go through PLT, we use scratch register %r11 as
42976 // PIC register and initialize it here.
42977 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
42978 ix86_init_large_pic_reg (tmp_regno);
42979 fnaddr = legitimize_pic_address (fnaddr,
42980 gen_rtx_REG (Pmode, tmp_regno));
42983 if (!sibcall_insn_operand (fnaddr, word_mode))
42985 tmp = gen_rtx_REG (word_mode, tmp_regno);
42986 if (GET_MODE (fnaddr) != word_mode)
42987 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
42988 emit_move_insn (tmp, fnaddr);
42989 fnaddr = tmp;
42992 tmp = gen_rtx_MEM (QImode, fnaddr);
42993 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
42994 tmp = emit_call_insn (tmp);
42995 SIBLING_CALL_P (tmp) = 1;
42997 emit_barrier ();
42999 /* Emit just enough of rest_of_compilation to get the insns emitted.
43000 Note that use_thunk calls assemble_start_function et al. */
43001 insn = get_insns ();
43002 shorten_branches (insn);
43003 final_start_function (insn, file, 1);
43004 final (insn, file, 1);
43005 final_end_function ();
43008 static void
43009 x86_file_start (void)
43011 default_file_start ();
43012 if (TARGET_16BIT)
43013 fputs ("\t.code16gcc\n", asm_out_file);
43014 #if TARGET_MACHO
43015 darwin_file_start ();
43016 #endif
43017 if (X86_FILE_START_VERSION_DIRECTIVE)
43018 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
43019 if (X86_FILE_START_FLTUSED)
43020 fputs ("\t.global\t__fltused\n", asm_out_file);
43021 if (ix86_asm_dialect == ASM_INTEL)
43022 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
43026 x86_field_alignment (tree field, int computed)
43028 machine_mode mode;
43029 tree type = TREE_TYPE (field);
43031 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
43032 return computed;
43033 mode = TYPE_MODE (strip_array_types (type));
43034 if (mode == DFmode || mode == DCmode
43035 || GET_MODE_CLASS (mode) == MODE_INT
43036 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
43037 return MIN (32, computed);
43038 return computed;
43041 /* Print call to TARGET to FILE. */
43043 static void
43044 x86_print_call_or_nop (FILE *file, const char *target)
43046 if (flag_nop_mcount)
43047 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
43048 else
43049 fprintf (file, "1:\tcall\t%s\n", target);
43052 /* Output assembler code to FILE to increment profiler label # LABELNO
43053 for profiling a function entry. */
43054 void
43055 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
43057 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
43058 : MCOUNT_NAME);
43059 if (TARGET_64BIT)
43061 #ifndef NO_PROFILE_COUNTERS
43062 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
43063 #endif
43065 if (!TARGET_PECOFF && flag_pic)
43066 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
43067 else
43068 x86_print_call_or_nop (file, mcount_name);
43070 else if (flag_pic)
43072 #ifndef NO_PROFILE_COUNTERS
43073 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
43074 LPREFIX, labelno);
43075 #endif
43076 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
43078 else
43080 #ifndef NO_PROFILE_COUNTERS
43081 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
43082 LPREFIX, labelno);
43083 #endif
43084 x86_print_call_or_nop (file, mcount_name);
43087 if (flag_record_mcount)
43089 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
43090 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
43091 fprintf (file, "\t.previous\n");
43095 /* We don't have exact information about the insn sizes, but we may assume
43096 quite safely that we are informed about all 1 byte insns and memory
43097 address sizes. This is enough to eliminate unnecessary padding in
43098 99% of cases. */
43100 static int
43101 min_insn_size (rtx_insn *insn)
43103 int l = 0, len;
43105 if (!INSN_P (insn) || !active_insn_p (insn))
43106 return 0;
43108 /* Discard alignments we've emit and jump instructions. */
43109 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
43110 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
43111 return 0;
43113 /* Important case - calls are always 5 bytes.
43114 It is common to have many calls in the row. */
43115 if (CALL_P (insn)
43116 && symbolic_reference_mentioned_p (PATTERN (insn))
43117 && !SIBLING_CALL_P (insn))
43118 return 5;
43119 len = get_attr_length (insn);
43120 if (len <= 1)
43121 return 1;
43123 /* For normal instructions we rely on get_attr_length being exact,
43124 with a few exceptions. */
43125 if (!JUMP_P (insn))
43127 enum attr_type type = get_attr_type (insn);
43129 switch (type)
43131 case TYPE_MULTI:
43132 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
43133 || asm_noperands (PATTERN (insn)) >= 0)
43134 return 0;
43135 break;
43136 case TYPE_OTHER:
43137 case TYPE_FCMP:
43138 break;
43139 default:
43140 /* Otherwise trust get_attr_length. */
43141 return len;
43144 l = get_attr_length_address (insn);
43145 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
43146 l = 4;
43148 if (l)
43149 return 1+l;
43150 else
43151 return 2;
43154 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43156 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
43157 window. */
43159 static void
43160 ix86_avoid_jump_mispredicts (void)
43162 rtx_insn *insn, *start = get_insns ();
43163 int nbytes = 0, njumps = 0;
43164 bool isjump = false;
43166 /* Look for all minimal intervals of instructions containing 4 jumps.
43167 The intervals are bounded by START and INSN. NBYTES is the total
43168 size of instructions in the interval including INSN and not including
43169 START. When the NBYTES is smaller than 16 bytes, it is possible
43170 that the end of START and INSN ends up in the same 16byte page.
43172 The smallest offset in the page INSN can start is the case where START
43173 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
43174 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
43176 Don't consider asm goto as jump, while it can contain a jump, it doesn't
43177 have to, control transfer to label(s) can be performed through other
43178 means, and also we estimate minimum length of all asm stmts as 0. */
43179 for (insn = start; insn; insn = NEXT_INSN (insn))
43181 int min_size;
43183 if (LABEL_P (insn))
43185 int align = label_to_alignment (insn);
43186 int max_skip = label_to_max_skip (insn);
43188 if (max_skip > 15)
43189 max_skip = 15;
43190 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
43191 already in the current 16 byte page, because otherwise
43192 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
43193 bytes to reach 16 byte boundary. */
43194 if (align <= 0
43195 || (align <= 3 && max_skip != (1 << align) - 1))
43196 max_skip = 0;
43197 if (dump_file)
43198 fprintf (dump_file, "Label %i with max_skip %i\n",
43199 INSN_UID (insn), max_skip);
43200 if (max_skip)
43202 while (nbytes + max_skip >= 16)
43204 start = NEXT_INSN (start);
43205 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43206 || CALL_P (start))
43207 njumps--, isjump = true;
43208 else
43209 isjump = false;
43210 nbytes -= min_insn_size (start);
43213 continue;
43216 min_size = min_insn_size (insn);
43217 nbytes += min_size;
43218 if (dump_file)
43219 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
43220 INSN_UID (insn), min_size);
43221 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
43222 || CALL_P (insn))
43223 njumps++;
43224 else
43225 continue;
43227 while (njumps > 3)
43229 start = NEXT_INSN (start);
43230 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43231 || CALL_P (start))
43232 njumps--, isjump = true;
43233 else
43234 isjump = false;
43235 nbytes -= min_insn_size (start);
43237 gcc_assert (njumps >= 0);
43238 if (dump_file)
43239 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
43240 INSN_UID (start), INSN_UID (insn), nbytes);
43242 if (njumps == 3 && isjump && nbytes < 16)
43244 int padsize = 15 - nbytes + min_insn_size (insn);
43246 if (dump_file)
43247 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
43248 INSN_UID (insn), padsize);
43249 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
43253 #endif
43255 /* AMD Athlon works faster
43256 when RET is not destination of conditional jump or directly preceded
43257 by other jump instruction. We avoid the penalty by inserting NOP just
43258 before the RET instructions in such cases. */
43259 static void
43260 ix86_pad_returns (void)
43262 edge e;
43263 edge_iterator ei;
43265 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43267 basic_block bb = e->src;
43268 rtx_insn *ret = BB_END (bb);
43269 rtx_insn *prev;
43270 bool replace = false;
43272 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
43273 || optimize_bb_for_size_p (bb))
43274 continue;
43275 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
43276 if (active_insn_p (prev) || LABEL_P (prev))
43277 break;
43278 if (prev && LABEL_P (prev))
43280 edge e;
43281 edge_iterator ei;
43283 FOR_EACH_EDGE (e, ei, bb->preds)
43284 if (EDGE_FREQUENCY (e) && e->src->index >= 0
43285 && !(e->flags & EDGE_FALLTHRU))
43287 replace = true;
43288 break;
43291 if (!replace)
43293 prev = prev_active_insn (ret);
43294 if (prev
43295 && ((JUMP_P (prev) && any_condjump_p (prev))
43296 || CALL_P (prev)))
43297 replace = true;
43298 /* Empty functions get branch mispredict even when
43299 the jump destination is not visible to us. */
43300 if (!prev && !optimize_function_for_size_p (cfun))
43301 replace = true;
43303 if (replace)
43305 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43306 delete_insn (ret);
43311 /* Count the minimum number of instructions in BB. Return 4 if the
43312 number of instructions >= 4. */
43314 static int
43315 ix86_count_insn_bb (basic_block bb)
43317 rtx_insn *insn;
43318 int insn_count = 0;
43320 /* Count number of instructions in this block. Return 4 if the number
43321 of instructions >= 4. */
43322 FOR_BB_INSNS (bb, insn)
43324 /* Only happen in exit blocks. */
43325 if (JUMP_P (insn)
43326 && ANY_RETURN_P (PATTERN (insn)))
43327 break;
43329 if (NONDEBUG_INSN_P (insn)
43330 && GET_CODE (PATTERN (insn)) != USE
43331 && GET_CODE (PATTERN (insn)) != CLOBBER)
43333 insn_count++;
43334 if (insn_count >= 4)
43335 return insn_count;
43339 return insn_count;
43343 /* Count the minimum number of instructions in code path in BB.
43344 Return 4 if the number of instructions >= 4. */
43346 static int
43347 ix86_count_insn (basic_block bb)
43349 edge e;
43350 edge_iterator ei;
43351 int min_prev_count;
43353 /* Only bother counting instructions along paths with no
43354 more than 2 basic blocks between entry and exit. Given
43355 that BB has an edge to exit, determine if a predecessor
43356 of BB has an edge from entry. If so, compute the number
43357 of instructions in the predecessor block. If there
43358 happen to be multiple such blocks, compute the minimum. */
43359 min_prev_count = 4;
43360 FOR_EACH_EDGE (e, ei, bb->preds)
43362 edge prev_e;
43363 edge_iterator prev_ei;
43365 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43367 min_prev_count = 0;
43368 break;
43370 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43372 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43374 int count = ix86_count_insn_bb (e->src);
43375 if (count < min_prev_count)
43376 min_prev_count = count;
43377 break;
43382 if (min_prev_count < 4)
43383 min_prev_count += ix86_count_insn_bb (bb);
43385 return min_prev_count;
43388 /* Pad short function to 4 instructions. */
43390 static void
43391 ix86_pad_short_function (void)
43393 edge e;
43394 edge_iterator ei;
43396 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43398 rtx_insn *ret = BB_END (e->src);
43399 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43401 int insn_count = ix86_count_insn (e->src);
43403 /* Pad short function. */
43404 if (insn_count < 4)
43406 rtx_insn *insn = ret;
43408 /* Find epilogue. */
43409 while (insn
43410 && (!NOTE_P (insn)
43411 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43412 insn = PREV_INSN (insn);
43414 if (!insn)
43415 insn = ret;
43417 /* Two NOPs count as one instruction. */
43418 insn_count = 2 * (4 - insn_count);
43419 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43425 /* Fix up a Windows system unwinder issue. If an EH region falls through into
43426 the epilogue, the Windows system unwinder will apply epilogue logic and
43427 produce incorrect offsets. This can be avoided by adding a nop between
43428 the last insn that can throw and the first insn of the epilogue. */
43430 static void
43431 ix86_seh_fixup_eh_fallthru (void)
43433 edge e;
43434 edge_iterator ei;
43436 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43438 rtx_insn *insn, *next;
43440 /* Find the beginning of the epilogue. */
43441 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43442 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43443 break;
43444 if (insn == NULL)
43445 continue;
43447 /* We only care about preceding insns that can throw. */
43448 insn = prev_active_insn (insn);
43449 if (insn == NULL || !can_throw_internal (insn))
43450 continue;
43452 /* Do not separate calls from their debug information. */
43453 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43454 if (NOTE_P (next)
43455 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43456 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43457 insn = next;
43458 else
43459 break;
43461 emit_insn_after (gen_nops (const1_rtx), insn);
43465 /* Implement machine specific optimizations. We implement padding of returns
43466 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
43467 static void
43468 ix86_reorg (void)
43470 /* We are freeing block_for_insn in the toplev to keep compatibility
43471 with old MDEP_REORGS that are not CFG based. Recompute it now. */
43472 compute_bb_for_insn ();
43474 if (TARGET_SEH && current_function_has_exception_handlers ())
43475 ix86_seh_fixup_eh_fallthru ();
43477 if (optimize && optimize_function_for_speed_p (cfun))
43479 if (TARGET_PAD_SHORT_FUNCTION)
43480 ix86_pad_short_function ();
43481 else if (TARGET_PAD_RETURNS)
43482 ix86_pad_returns ();
43483 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43484 if (TARGET_FOUR_JUMP_LIMIT)
43485 ix86_avoid_jump_mispredicts ();
43486 #endif
43490 /* Return nonzero when QImode register that must be represented via REX prefix
43491 is used. */
43492 bool
43493 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43495 int i;
43496 extract_insn_cached (insn);
43497 for (i = 0; i < recog_data.n_operands; i++)
43498 if (GENERAL_REG_P (recog_data.operand[i])
43499 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
43500 return true;
43501 return false;
43504 /* Return true when INSN mentions register that must be encoded using REX
43505 prefix. */
43506 bool
43507 x86_extended_reg_mentioned_p (rtx insn)
43509 subrtx_iterator::array_type array;
43510 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43512 const_rtx x = *iter;
43513 if (REG_P (x)
43514 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43515 return true;
43517 return false;
43520 /* If profitable, negate (without causing overflow) integer constant
43521 of mode MODE at location LOC. Return true in this case. */
43522 bool
43523 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43525 HOST_WIDE_INT val;
43527 if (!CONST_INT_P (*loc))
43528 return false;
43530 switch (mode)
43532 case DImode:
43533 /* DImode x86_64 constants must fit in 32 bits. */
43534 gcc_assert (x86_64_immediate_operand (*loc, mode));
43536 mode = SImode;
43537 break;
43539 case SImode:
43540 case HImode:
43541 case QImode:
43542 break;
43544 default:
43545 gcc_unreachable ();
43548 /* Avoid overflows. */
43549 if (mode_signbit_p (mode, *loc))
43550 return false;
43552 val = INTVAL (*loc);
43554 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43555 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
43556 if ((val < 0 && val != -128)
43557 || val == 128)
43559 *loc = GEN_INT (-val);
43560 return true;
43563 return false;
43566 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
43567 optabs would emit if we didn't have TFmode patterns. */
43569 void
43570 x86_emit_floatuns (rtx operands[2])
43572 rtx_code_label *neglab, *donelab;
43573 rtx i0, i1, f0, in, out;
43574 machine_mode mode, inmode;
43576 inmode = GET_MODE (operands[1]);
43577 gcc_assert (inmode == SImode || inmode == DImode);
43579 out = operands[0];
43580 in = force_reg (inmode, operands[1]);
43581 mode = GET_MODE (out);
43582 neglab = gen_label_rtx ();
43583 donelab = gen_label_rtx ();
43584 f0 = gen_reg_rtx (mode);
43586 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43588 expand_float (out, in, 0);
43590 emit_jump_insn (gen_jump (donelab));
43591 emit_barrier ();
43593 emit_label (neglab);
43595 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43596 1, OPTAB_DIRECT);
43597 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43598 1, OPTAB_DIRECT);
43599 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43601 expand_float (f0, i0, 0);
43603 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
43605 emit_label (donelab);
43608 static bool canonicalize_perm (struct expand_vec_perm_d *d);
43609 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43610 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43611 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43613 /* Get a vector mode of the same size as the original but with elements
43614 twice as wide. This is only guaranteed to apply to integral vectors. */
43616 static inline machine_mode
43617 get_mode_wider_vector (machine_mode o)
43619 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
43620 machine_mode n = GET_MODE_WIDER_MODE (o);
43621 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43622 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43623 return n;
43626 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
43627 fill target with val via vec_duplicate. */
43629 static bool
43630 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
43632 bool ok;
43633 rtx_insn *insn;
43634 rtx dup;
43636 /* First attempt to recognize VAL as-is. */
43637 dup = gen_rtx_VEC_DUPLICATE (mode, val);
43638 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
43639 if (recog_memoized (insn) < 0)
43641 rtx_insn *seq;
43642 /* If that fails, force VAL into a register. */
43644 start_sequence ();
43645 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
43646 seq = get_insns ();
43647 end_sequence ();
43648 if (seq)
43649 emit_insn_before (seq, insn);
43651 ok = recog_memoized (insn) >= 0;
43652 gcc_assert (ok);
43654 return true;
43657 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43658 with all elements equal to VAR. Return true if successful. */
43660 static bool
43661 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
43662 rtx target, rtx val)
43664 bool ok;
43666 switch (mode)
43668 case V2SImode:
43669 case V2SFmode:
43670 if (!mmx_ok)
43671 return false;
43672 /* FALLTHRU */
43674 case V4DFmode:
43675 case V4DImode:
43676 case V8SFmode:
43677 case V8SImode:
43678 case V2DFmode:
43679 case V2DImode:
43680 case V4SFmode:
43681 case V4SImode:
43682 case V16SImode:
43683 case V8DImode:
43684 case V16SFmode:
43685 case V8DFmode:
43686 return ix86_vector_duplicate_value (mode, target, val);
43688 case V4HImode:
43689 if (!mmx_ok)
43690 return false;
43691 if (TARGET_SSE || TARGET_3DNOW_A)
43693 rtx x;
43695 val = gen_lowpart (SImode, val);
43696 x = gen_rtx_TRUNCATE (HImode, val);
43697 x = gen_rtx_VEC_DUPLICATE (mode, x);
43698 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43699 return true;
43701 goto widen;
43703 case V8QImode:
43704 if (!mmx_ok)
43705 return false;
43706 goto widen;
43708 case V8HImode:
43709 if (TARGET_AVX2)
43710 return ix86_vector_duplicate_value (mode, target, val);
43712 if (TARGET_SSE2)
43714 struct expand_vec_perm_d dperm;
43715 rtx tmp1, tmp2;
43717 permute:
43718 memset (&dperm, 0, sizeof (dperm));
43719 dperm.target = target;
43720 dperm.vmode = mode;
43721 dperm.nelt = GET_MODE_NUNITS (mode);
43722 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
43723 dperm.one_operand_p = true;
43725 /* Extend to SImode using a paradoxical SUBREG. */
43726 tmp1 = gen_reg_rtx (SImode);
43727 emit_move_insn (tmp1, gen_lowpart (SImode, val));
43729 /* Insert the SImode value as low element of a V4SImode vector. */
43730 tmp2 = gen_reg_rtx (V4SImode);
43731 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
43732 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
43734 ok = (expand_vec_perm_1 (&dperm)
43735 || expand_vec_perm_broadcast_1 (&dperm));
43736 gcc_assert (ok);
43737 return ok;
43739 goto widen;
43741 case V16QImode:
43742 if (TARGET_AVX2)
43743 return ix86_vector_duplicate_value (mode, target, val);
43745 if (TARGET_SSE2)
43746 goto permute;
43747 goto widen;
43749 widen:
43750 /* Replicate the value once into the next wider mode and recurse. */
43752 machine_mode smode, wsmode, wvmode;
43753 rtx x;
43755 smode = GET_MODE_INNER (mode);
43756 wvmode = get_mode_wider_vector (mode);
43757 wsmode = GET_MODE_INNER (wvmode);
43759 val = convert_modes (wsmode, smode, val, true);
43760 x = expand_simple_binop (wsmode, ASHIFT, val,
43761 GEN_INT (GET_MODE_BITSIZE (smode)),
43762 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43763 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
43765 x = gen_reg_rtx (wvmode);
43766 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
43767 gcc_assert (ok);
43768 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
43769 return ok;
43772 case V16HImode:
43773 case V32QImode:
43774 if (TARGET_AVX2)
43775 return ix86_vector_duplicate_value (mode, target, val);
43776 else
43778 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
43779 rtx x = gen_reg_rtx (hvmode);
43781 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43782 gcc_assert (ok);
43784 x = gen_rtx_VEC_CONCAT (mode, x, x);
43785 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43787 return true;
43789 case V64QImode:
43790 case V32HImode:
43791 if (TARGET_AVX512BW)
43792 return ix86_vector_duplicate_value (mode, target, val);
43793 else
43795 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
43796 rtx x = gen_reg_rtx (hvmode);
43798 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43799 gcc_assert (ok);
43801 x = gen_rtx_VEC_CONCAT (mode, x, x);
43802 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43804 return true;
43806 default:
43807 return false;
43811 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43812 whose ONE_VAR element is VAR, and other elements are zero. Return true
43813 if successful. */
43815 static bool
43816 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
43817 rtx target, rtx var, int one_var)
43819 machine_mode vsimode;
43820 rtx new_target;
43821 rtx x, tmp;
43822 bool use_vector_set = false;
43824 switch (mode)
43826 case V2DImode:
43827 /* For SSE4.1, we normally use vector set. But if the second
43828 element is zero and inter-unit moves are OK, we use movq
43829 instead. */
43830 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
43831 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
43832 && one_var == 0));
43833 break;
43834 case V16QImode:
43835 case V4SImode:
43836 case V4SFmode:
43837 use_vector_set = TARGET_SSE4_1;
43838 break;
43839 case V8HImode:
43840 use_vector_set = TARGET_SSE2;
43841 break;
43842 case V4HImode:
43843 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
43844 break;
43845 case V32QImode:
43846 case V16HImode:
43847 case V8SImode:
43848 case V8SFmode:
43849 case V4DFmode:
43850 use_vector_set = TARGET_AVX;
43851 break;
43852 case V4DImode:
43853 /* Use ix86_expand_vector_set in 64bit mode only. */
43854 use_vector_set = TARGET_AVX && TARGET_64BIT;
43855 break;
43856 default:
43857 break;
43860 if (use_vector_set)
43862 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
43863 var = force_reg (GET_MODE_INNER (mode), var);
43864 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43865 return true;
43868 switch (mode)
43870 case V2SFmode:
43871 case V2SImode:
43872 if (!mmx_ok)
43873 return false;
43874 /* FALLTHRU */
43876 case V2DFmode:
43877 case V2DImode:
43878 if (one_var != 0)
43879 return false;
43880 var = force_reg (GET_MODE_INNER (mode), var);
43881 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
43882 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43883 return true;
43885 case V4SFmode:
43886 case V4SImode:
43887 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
43888 new_target = gen_reg_rtx (mode);
43889 else
43890 new_target = target;
43891 var = force_reg (GET_MODE_INNER (mode), var);
43892 x = gen_rtx_VEC_DUPLICATE (mode, var);
43893 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
43894 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
43895 if (one_var != 0)
43897 /* We need to shuffle the value to the correct position, so
43898 create a new pseudo to store the intermediate result. */
43900 /* With SSE2, we can use the integer shuffle insns. */
43901 if (mode != V4SFmode && TARGET_SSE2)
43903 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
43904 const1_rtx,
43905 GEN_INT (one_var == 1 ? 0 : 1),
43906 GEN_INT (one_var == 2 ? 0 : 1),
43907 GEN_INT (one_var == 3 ? 0 : 1)));
43908 if (target != new_target)
43909 emit_move_insn (target, new_target);
43910 return true;
43913 /* Otherwise convert the intermediate result to V4SFmode and
43914 use the SSE1 shuffle instructions. */
43915 if (mode != V4SFmode)
43917 tmp = gen_reg_rtx (V4SFmode);
43918 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
43920 else
43921 tmp = new_target;
43923 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
43924 const1_rtx,
43925 GEN_INT (one_var == 1 ? 0 : 1),
43926 GEN_INT (one_var == 2 ? 0+4 : 1+4),
43927 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
43929 if (mode != V4SFmode)
43930 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
43931 else if (tmp != target)
43932 emit_move_insn (target, tmp);
43934 else if (target != new_target)
43935 emit_move_insn (target, new_target);
43936 return true;
43938 case V8HImode:
43939 case V16QImode:
43940 vsimode = V4SImode;
43941 goto widen;
43942 case V4HImode:
43943 case V8QImode:
43944 if (!mmx_ok)
43945 return false;
43946 vsimode = V2SImode;
43947 goto widen;
43948 widen:
43949 if (one_var != 0)
43950 return false;
43952 /* Zero extend the variable element to SImode and recurse. */
43953 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
43955 x = gen_reg_rtx (vsimode);
43956 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
43957 var, one_var))
43958 gcc_unreachable ();
43960 emit_move_insn (target, gen_lowpart (mode, x));
43961 return true;
43963 default:
43964 return false;
43968 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43969 consisting of the values in VALS. It is known that all elements
43970 except ONE_VAR are constants. Return true if successful. */
43972 static bool
43973 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
43974 rtx target, rtx vals, int one_var)
43976 rtx var = XVECEXP (vals, 0, one_var);
43977 machine_mode wmode;
43978 rtx const_vec, x;
43980 const_vec = copy_rtx (vals);
43981 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
43982 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
43984 switch (mode)
43986 case V2DFmode:
43987 case V2DImode:
43988 case V2SFmode:
43989 case V2SImode:
43990 /* For the two element vectors, it's just as easy to use
43991 the general case. */
43992 return false;
43994 case V4DImode:
43995 /* Use ix86_expand_vector_set in 64bit mode only. */
43996 if (!TARGET_64BIT)
43997 return false;
43998 case V4DFmode:
43999 case V8SFmode:
44000 case V8SImode:
44001 case V16HImode:
44002 case V32QImode:
44003 case V4SFmode:
44004 case V4SImode:
44005 case V8HImode:
44006 case V4HImode:
44007 break;
44009 case V16QImode:
44010 if (TARGET_SSE4_1)
44011 break;
44012 wmode = V8HImode;
44013 goto widen;
44014 case V8QImode:
44015 wmode = V4HImode;
44016 goto widen;
44017 widen:
44018 /* There's no way to set one QImode entry easily. Combine
44019 the variable value with its adjacent constant value, and
44020 promote to an HImode set. */
44021 x = XVECEXP (vals, 0, one_var ^ 1);
44022 if (one_var & 1)
44024 var = convert_modes (HImode, QImode, var, true);
44025 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
44026 NULL_RTX, 1, OPTAB_LIB_WIDEN);
44027 x = GEN_INT (INTVAL (x) & 0xff);
44029 else
44031 var = convert_modes (HImode, QImode, var, true);
44032 x = gen_int_mode (INTVAL (x) << 8, HImode);
44034 if (x != const0_rtx)
44035 var = expand_simple_binop (HImode, IOR, var, x, var,
44036 1, OPTAB_LIB_WIDEN);
44038 x = gen_reg_rtx (wmode);
44039 emit_move_insn (x, gen_lowpart (wmode, const_vec));
44040 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
44042 emit_move_insn (target, gen_lowpart (mode, x));
44043 return true;
44045 default:
44046 return false;
44049 emit_move_insn (target, const_vec);
44050 ix86_expand_vector_set (mmx_ok, target, var, one_var);
44051 return true;
44054 /* A subroutine of ix86_expand_vector_init_general. Use vector
44055 concatenate to handle the most general case: all values variable,
44056 and none identical. */
44058 static void
44059 ix86_expand_vector_init_concat (machine_mode mode,
44060 rtx target, rtx *ops, int n)
44062 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
44063 rtx first[16], second[8], third[4];
44064 rtvec v;
44065 int i, j;
44067 switch (n)
44069 case 2:
44070 switch (mode)
44072 case V16SImode:
44073 cmode = V8SImode;
44074 break;
44075 case V16SFmode:
44076 cmode = V8SFmode;
44077 break;
44078 case V8DImode:
44079 cmode = V4DImode;
44080 break;
44081 case V8DFmode:
44082 cmode = V4DFmode;
44083 break;
44084 case V8SImode:
44085 cmode = V4SImode;
44086 break;
44087 case V8SFmode:
44088 cmode = V4SFmode;
44089 break;
44090 case V4DImode:
44091 cmode = V2DImode;
44092 break;
44093 case V4DFmode:
44094 cmode = V2DFmode;
44095 break;
44096 case V4SImode:
44097 cmode = V2SImode;
44098 break;
44099 case V4SFmode:
44100 cmode = V2SFmode;
44101 break;
44102 case V2DImode:
44103 cmode = DImode;
44104 break;
44105 case V2SImode:
44106 cmode = SImode;
44107 break;
44108 case V2DFmode:
44109 cmode = DFmode;
44110 break;
44111 case V2SFmode:
44112 cmode = SFmode;
44113 break;
44114 default:
44115 gcc_unreachable ();
44118 if (!register_operand (ops[1], cmode))
44119 ops[1] = force_reg (cmode, ops[1]);
44120 if (!register_operand (ops[0], cmode))
44121 ops[0] = force_reg (cmode, ops[0]);
44122 emit_insn (gen_rtx_SET (VOIDmode, target,
44123 gen_rtx_VEC_CONCAT (mode, ops[0],
44124 ops[1])));
44125 break;
44127 case 4:
44128 switch (mode)
44130 case V4DImode:
44131 cmode = V2DImode;
44132 break;
44133 case V4DFmode:
44134 cmode = V2DFmode;
44135 break;
44136 case V4SImode:
44137 cmode = V2SImode;
44138 break;
44139 case V4SFmode:
44140 cmode = V2SFmode;
44141 break;
44142 default:
44143 gcc_unreachable ();
44145 goto half;
44147 case 8:
44148 switch (mode)
44150 case V8DImode:
44151 cmode = V2DImode;
44152 hmode = V4DImode;
44153 break;
44154 case V8DFmode:
44155 cmode = V2DFmode;
44156 hmode = V4DFmode;
44157 break;
44158 case V8SImode:
44159 cmode = V2SImode;
44160 hmode = V4SImode;
44161 break;
44162 case V8SFmode:
44163 cmode = V2SFmode;
44164 hmode = V4SFmode;
44165 break;
44166 default:
44167 gcc_unreachable ();
44169 goto half;
44171 case 16:
44172 switch (mode)
44174 case V16SImode:
44175 cmode = V2SImode;
44176 hmode = V4SImode;
44177 gmode = V8SImode;
44178 break;
44179 case V16SFmode:
44180 cmode = V2SFmode;
44181 hmode = V4SFmode;
44182 gmode = V8SFmode;
44183 break;
44184 default:
44185 gcc_unreachable ();
44187 goto half;
44189 half:
44190 /* FIXME: We process inputs backward to help RA. PR 36222. */
44191 i = n - 1;
44192 j = (n >> 1) - 1;
44193 for (; i > 0; i -= 2, j--)
44195 first[j] = gen_reg_rtx (cmode);
44196 v = gen_rtvec (2, ops[i - 1], ops[i]);
44197 ix86_expand_vector_init (false, first[j],
44198 gen_rtx_PARALLEL (cmode, v));
44201 n >>= 1;
44202 if (n > 4)
44204 gcc_assert (hmode != VOIDmode);
44205 gcc_assert (gmode != VOIDmode);
44206 for (i = j = 0; i < n; i += 2, j++)
44208 second[j] = gen_reg_rtx (hmode);
44209 ix86_expand_vector_init_concat (hmode, second [j],
44210 &first [i], 2);
44212 n >>= 1;
44213 for (i = j = 0; i < n; i += 2, j++)
44215 third[j] = gen_reg_rtx (gmode);
44216 ix86_expand_vector_init_concat (gmode, third[j],
44217 &second[i], 2);
44219 n >>= 1;
44220 ix86_expand_vector_init_concat (mode, target, third, n);
44222 else if (n > 2)
44224 gcc_assert (hmode != VOIDmode);
44225 for (i = j = 0; i < n; i += 2, j++)
44227 second[j] = gen_reg_rtx (hmode);
44228 ix86_expand_vector_init_concat (hmode, second [j],
44229 &first [i], 2);
44231 n >>= 1;
44232 ix86_expand_vector_init_concat (mode, target, second, n);
44234 else
44235 ix86_expand_vector_init_concat (mode, target, first, n);
44236 break;
44238 default:
44239 gcc_unreachable ();
44243 /* A subroutine of ix86_expand_vector_init_general. Use vector
44244 interleave to handle the most general case: all values variable,
44245 and none identical. */
44247 static void
44248 ix86_expand_vector_init_interleave (machine_mode mode,
44249 rtx target, rtx *ops, int n)
44251 machine_mode first_imode, second_imode, third_imode, inner_mode;
44252 int i, j;
44253 rtx op0, op1;
44254 rtx (*gen_load_even) (rtx, rtx, rtx);
44255 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
44256 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
44258 switch (mode)
44260 case V8HImode:
44261 gen_load_even = gen_vec_setv8hi;
44262 gen_interleave_first_low = gen_vec_interleave_lowv4si;
44263 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44264 inner_mode = HImode;
44265 first_imode = V4SImode;
44266 second_imode = V2DImode;
44267 third_imode = VOIDmode;
44268 break;
44269 case V16QImode:
44270 gen_load_even = gen_vec_setv16qi;
44271 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
44272 gen_interleave_second_low = gen_vec_interleave_lowv4si;
44273 inner_mode = QImode;
44274 first_imode = V8HImode;
44275 second_imode = V4SImode;
44276 third_imode = V2DImode;
44277 break;
44278 default:
44279 gcc_unreachable ();
44282 for (i = 0; i < n; i++)
44284 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
44285 op0 = gen_reg_rtx (SImode);
44286 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
44288 /* Insert the SImode value as low element of V4SImode vector. */
44289 op1 = gen_reg_rtx (V4SImode);
44290 op0 = gen_rtx_VEC_MERGE (V4SImode,
44291 gen_rtx_VEC_DUPLICATE (V4SImode,
44292 op0),
44293 CONST0_RTX (V4SImode),
44294 const1_rtx);
44295 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
44297 /* Cast the V4SImode vector back to a vector in orignal mode. */
44298 op0 = gen_reg_rtx (mode);
44299 emit_move_insn (op0, gen_lowpart (mode, op1));
44301 /* Load even elements into the second position. */
44302 emit_insn (gen_load_even (op0,
44303 force_reg (inner_mode,
44304 ops [i + i + 1]),
44305 const1_rtx));
44307 /* Cast vector to FIRST_IMODE vector. */
44308 ops[i] = gen_reg_rtx (first_imode);
44309 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44312 /* Interleave low FIRST_IMODE vectors. */
44313 for (i = j = 0; i < n; i += 2, j++)
44315 op0 = gen_reg_rtx (first_imode);
44316 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44318 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
44319 ops[j] = gen_reg_rtx (second_imode);
44320 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44323 /* Interleave low SECOND_IMODE vectors. */
44324 switch (second_imode)
44326 case V4SImode:
44327 for (i = j = 0; i < n / 2; i += 2, j++)
44329 op0 = gen_reg_rtx (second_imode);
44330 emit_insn (gen_interleave_second_low (op0, ops[i],
44331 ops[i + 1]));
44333 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44334 vector. */
44335 ops[j] = gen_reg_rtx (third_imode);
44336 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44338 second_imode = V2DImode;
44339 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44340 /* FALLTHRU */
44342 case V2DImode:
44343 op0 = gen_reg_rtx (second_imode);
44344 emit_insn (gen_interleave_second_low (op0, ops[0],
44345 ops[1]));
44347 /* Cast the SECOND_IMODE vector back to a vector on original
44348 mode. */
44349 emit_insn (gen_rtx_SET (VOIDmode, target,
44350 gen_lowpart (mode, op0)));
44351 break;
44353 default:
44354 gcc_unreachable ();
44358 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
44359 all values variable, and none identical. */
44361 static void
44362 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44363 rtx target, rtx vals)
44365 rtx ops[64], op0, op1, op2, op3, op4, op5;
44366 machine_mode half_mode = VOIDmode;
44367 machine_mode quarter_mode = VOIDmode;
44368 int n, i;
44370 switch (mode)
44372 case V2SFmode:
44373 case V2SImode:
44374 if (!mmx_ok && !TARGET_SSE)
44375 break;
44376 /* FALLTHRU */
44378 case V16SImode:
44379 case V16SFmode:
44380 case V8DFmode:
44381 case V8DImode:
44382 case V8SFmode:
44383 case V8SImode:
44384 case V4DFmode:
44385 case V4DImode:
44386 case V4SFmode:
44387 case V4SImode:
44388 case V2DFmode:
44389 case V2DImode:
44390 n = GET_MODE_NUNITS (mode);
44391 for (i = 0; i < n; i++)
44392 ops[i] = XVECEXP (vals, 0, i);
44393 ix86_expand_vector_init_concat (mode, target, ops, n);
44394 return;
44396 case V32QImode:
44397 half_mode = V16QImode;
44398 goto half;
44400 case V16HImode:
44401 half_mode = V8HImode;
44402 goto half;
44404 half:
44405 n = GET_MODE_NUNITS (mode);
44406 for (i = 0; i < n; i++)
44407 ops[i] = XVECEXP (vals, 0, i);
44408 op0 = gen_reg_rtx (half_mode);
44409 op1 = gen_reg_rtx (half_mode);
44410 ix86_expand_vector_init_interleave (half_mode, op0, ops,
44411 n >> 2);
44412 ix86_expand_vector_init_interleave (half_mode, op1,
44413 &ops [n >> 1], n >> 2);
44414 emit_insn (gen_rtx_SET (VOIDmode, target,
44415 gen_rtx_VEC_CONCAT (mode, op0, op1)));
44416 return;
44418 case V64QImode:
44419 quarter_mode = V16QImode;
44420 half_mode = V32QImode;
44421 goto quarter;
44423 case V32HImode:
44424 quarter_mode = V8HImode;
44425 half_mode = V16HImode;
44426 goto quarter;
44428 quarter:
44429 n = GET_MODE_NUNITS (mode);
44430 for (i = 0; i < n; i++)
44431 ops[i] = XVECEXP (vals, 0, i);
44432 op0 = gen_reg_rtx (quarter_mode);
44433 op1 = gen_reg_rtx (quarter_mode);
44434 op2 = gen_reg_rtx (quarter_mode);
44435 op3 = gen_reg_rtx (quarter_mode);
44436 op4 = gen_reg_rtx (half_mode);
44437 op5 = gen_reg_rtx (half_mode);
44438 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44439 n >> 3);
44440 ix86_expand_vector_init_interleave (quarter_mode, op1,
44441 &ops [n >> 2], n >> 3);
44442 ix86_expand_vector_init_interleave (quarter_mode, op2,
44443 &ops [n >> 1], n >> 3);
44444 ix86_expand_vector_init_interleave (quarter_mode, op3,
44445 &ops [(n >> 1) | (n >> 2)], n >> 3);
44446 emit_insn (gen_rtx_SET (VOIDmode, op4,
44447 gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44448 emit_insn (gen_rtx_SET (VOIDmode, op5,
44449 gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44450 emit_insn (gen_rtx_SET (VOIDmode, target,
44451 gen_rtx_VEC_CONCAT (mode, op4, op5)));
44452 return;
44454 case V16QImode:
44455 if (!TARGET_SSE4_1)
44456 break;
44457 /* FALLTHRU */
44459 case V8HImode:
44460 if (!TARGET_SSE2)
44461 break;
44463 /* Don't use ix86_expand_vector_init_interleave if we can't
44464 move from GPR to SSE register directly. */
44465 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44466 break;
44468 n = GET_MODE_NUNITS (mode);
44469 for (i = 0; i < n; i++)
44470 ops[i] = XVECEXP (vals, 0, i);
44471 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44472 return;
44474 case V4HImode:
44475 case V8QImode:
44476 break;
44478 default:
44479 gcc_unreachable ();
44483 int i, j, n_elts, n_words, n_elt_per_word;
44484 machine_mode inner_mode;
44485 rtx words[4], shift;
44487 inner_mode = GET_MODE_INNER (mode);
44488 n_elts = GET_MODE_NUNITS (mode);
44489 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44490 n_elt_per_word = n_elts / n_words;
44491 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44493 for (i = 0; i < n_words; ++i)
44495 rtx word = NULL_RTX;
44497 for (j = 0; j < n_elt_per_word; ++j)
44499 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44500 elt = convert_modes (word_mode, inner_mode, elt, true);
44502 if (j == 0)
44503 word = elt;
44504 else
44506 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44507 word, 1, OPTAB_LIB_WIDEN);
44508 word = expand_simple_binop (word_mode, IOR, word, elt,
44509 word, 1, OPTAB_LIB_WIDEN);
44513 words[i] = word;
44516 if (n_words == 1)
44517 emit_move_insn (target, gen_lowpart (mode, words[0]));
44518 else if (n_words == 2)
44520 rtx tmp = gen_reg_rtx (mode);
44521 emit_clobber (tmp);
44522 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44523 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44524 emit_move_insn (target, tmp);
44526 else if (n_words == 4)
44528 rtx tmp = gen_reg_rtx (V4SImode);
44529 gcc_assert (word_mode == SImode);
44530 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44531 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44532 emit_move_insn (target, gen_lowpart (mode, tmp));
44534 else
44535 gcc_unreachable ();
44539 /* Initialize vector TARGET via VALS. Suppress the use of MMX
44540 instructions unless MMX_OK is true. */
44542 void
44543 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44545 machine_mode mode = GET_MODE (target);
44546 machine_mode inner_mode = GET_MODE_INNER (mode);
44547 int n_elts = GET_MODE_NUNITS (mode);
44548 int n_var = 0, one_var = -1;
44549 bool all_same = true, all_const_zero = true;
44550 int i;
44551 rtx x;
44553 for (i = 0; i < n_elts; ++i)
44555 x = XVECEXP (vals, 0, i);
44556 if (!(CONST_SCALAR_INT_P (x)
44557 || CONST_DOUBLE_P (x)
44558 || CONST_FIXED_P (x)))
44559 n_var++, one_var = i;
44560 else if (x != CONST0_RTX (inner_mode))
44561 all_const_zero = false;
44562 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44563 all_same = false;
44566 /* Constants are best loaded from the constant pool. */
44567 if (n_var == 0)
44569 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44570 return;
44573 /* If all values are identical, broadcast the value. */
44574 if (all_same
44575 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44576 XVECEXP (vals, 0, 0)))
44577 return;
44579 /* Values where only one field is non-constant are best loaded from
44580 the pool and overwritten via move later. */
44581 if (n_var == 1)
44583 if (all_const_zero
44584 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44585 XVECEXP (vals, 0, one_var),
44586 one_var))
44587 return;
44589 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44590 return;
44593 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44596 void
44597 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44599 machine_mode mode = GET_MODE (target);
44600 machine_mode inner_mode = GET_MODE_INNER (mode);
44601 machine_mode half_mode;
44602 bool use_vec_merge = false;
44603 rtx tmp;
44604 static rtx (*gen_extract[6][2]) (rtx, rtx)
44606 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44607 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44608 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44609 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44610 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44611 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44613 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44615 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44616 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44617 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44618 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44619 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44620 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44622 int i, j, n;
44624 switch (mode)
44626 case V2SFmode:
44627 case V2SImode:
44628 if (mmx_ok)
44630 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44631 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
44632 if (elt == 0)
44633 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44634 else
44635 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44636 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44637 return;
44639 break;
44641 case V2DImode:
44642 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
44643 if (use_vec_merge)
44644 break;
44646 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44647 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
44648 if (elt == 0)
44649 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44650 else
44651 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44652 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44653 return;
44655 case V2DFmode:
44657 rtx op0, op1;
44659 /* For the two element vectors, we implement a VEC_CONCAT with
44660 the extraction of the other element. */
44662 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
44663 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
44665 if (elt == 0)
44666 op0 = val, op1 = tmp;
44667 else
44668 op0 = tmp, op1 = val;
44670 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
44671 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44673 return;
44675 case V4SFmode:
44676 use_vec_merge = TARGET_SSE4_1;
44677 if (use_vec_merge)
44678 break;
44680 switch (elt)
44682 case 0:
44683 use_vec_merge = true;
44684 break;
44686 case 1:
44687 /* tmp = target = A B C D */
44688 tmp = copy_to_reg (target);
44689 /* target = A A B B */
44690 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
44691 /* target = X A B B */
44692 ix86_expand_vector_set (false, target, val, 0);
44693 /* target = A X C D */
44694 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44695 const1_rtx, const0_rtx,
44696 GEN_INT (2+4), GEN_INT (3+4)));
44697 return;
44699 case 2:
44700 /* tmp = target = A B C D */
44701 tmp = copy_to_reg (target);
44702 /* tmp = X B C D */
44703 ix86_expand_vector_set (false, tmp, val, 0);
44704 /* target = A B X D */
44705 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44706 const0_rtx, const1_rtx,
44707 GEN_INT (0+4), GEN_INT (3+4)));
44708 return;
44710 case 3:
44711 /* tmp = target = A B C D */
44712 tmp = copy_to_reg (target);
44713 /* tmp = X B C D */
44714 ix86_expand_vector_set (false, tmp, val, 0);
44715 /* target = A B X D */
44716 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44717 const0_rtx, const1_rtx,
44718 GEN_INT (2+4), GEN_INT (0+4)));
44719 return;
44721 default:
44722 gcc_unreachable ();
44724 break;
44726 case V4SImode:
44727 use_vec_merge = TARGET_SSE4_1;
44728 if (use_vec_merge)
44729 break;
44731 /* Element 0 handled by vec_merge below. */
44732 if (elt == 0)
44734 use_vec_merge = true;
44735 break;
44738 if (TARGET_SSE2)
44740 /* With SSE2, use integer shuffles to swap element 0 and ELT,
44741 store into element 0, then shuffle them back. */
44743 rtx order[4];
44745 order[0] = GEN_INT (elt);
44746 order[1] = const1_rtx;
44747 order[2] = const2_rtx;
44748 order[3] = GEN_INT (3);
44749 order[elt] = const0_rtx;
44751 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44752 order[1], order[2], order[3]));
44754 ix86_expand_vector_set (false, target, val, 0);
44756 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44757 order[1], order[2], order[3]));
44759 else
44761 /* For SSE1, we have to reuse the V4SF code. */
44762 rtx t = gen_reg_rtx (V4SFmode);
44763 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
44764 emit_move_insn (target, gen_lowpart (mode, t));
44766 return;
44768 case V8HImode:
44769 use_vec_merge = TARGET_SSE2;
44770 break;
44771 case V4HImode:
44772 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44773 break;
44775 case V16QImode:
44776 use_vec_merge = TARGET_SSE4_1;
44777 break;
44779 case V8QImode:
44780 break;
44782 case V32QImode:
44783 half_mode = V16QImode;
44784 j = 0;
44785 n = 16;
44786 goto half;
44788 case V16HImode:
44789 half_mode = V8HImode;
44790 j = 1;
44791 n = 8;
44792 goto half;
44794 case V8SImode:
44795 half_mode = V4SImode;
44796 j = 2;
44797 n = 4;
44798 goto half;
44800 case V4DImode:
44801 half_mode = V2DImode;
44802 j = 3;
44803 n = 2;
44804 goto half;
44806 case V8SFmode:
44807 half_mode = V4SFmode;
44808 j = 4;
44809 n = 4;
44810 goto half;
44812 case V4DFmode:
44813 half_mode = V2DFmode;
44814 j = 5;
44815 n = 2;
44816 goto half;
44818 half:
44819 /* Compute offset. */
44820 i = elt / n;
44821 elt %= n;
44823 gcc_assert (i <= 1);
44825 /* Extract the half. */
44826 tmp = gen_reg_rtx (half_mode);
44827 emit_insn (gen_extract[j][i] (tmp, target));
44829 /* Put val in tmp at elt. */
44830 ix86_expand_vector_set (false, tmp, val, elt);
44832 /* Put it back. */
44833 emit_insn (gen_insert[j][i] (target, target, tmp));
44834 return;
44836 case V8DFmode:
44837 if (TARGET_AVX512F)
44839 tmp = gen_reg_rtx (mode);
44840 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44841 gen_rtx_VEC_DUPLICATE (mode, val)));
44842 emit_insn (gen_avx512f_blendmv8df (target, tmp, target,
44843 force_reg (QImode, GEN_INT (1 << elt))));
44844 return;
44846 else
44847 break;
44848 case V8DImode:
44849 if (TARGET_AVX512F)
44851 tmp = gen_reg_rtx (mode);
44852 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44853 gen_rtx_VEC_DUPLICATE (mode, val)));
44854 emit_insn (gen_avx512f_blendmv8di (target, tmp, target,
44855 force_reg (QImode, GEN_INT (1 << elt))));
44856 return;
44858 else
44859 break;
44860 case V16SFmode:
44861 if (TARGET_AVX512F)
44863 tmp = gen_reg_rtx (mode);
44864 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44865 gen_rtx_VEC_DUPLICATE (mode, val)));
44866 emit_insn (gen_avx512f_blendmv16sf (target, tmp, target,
44867 force_reg (HImode, GEN_INT (1 << elt))));
44868 return;
44870 else
44871 break;
44872 case V16SImode:
44873 if (TARGET_AVX512F)
44875 tmp = gen_reg_rtx (mode);
44876 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44877 gen_rtx_VEC_DUPLICATE (mode, val)));
44878 emit_insn (gen_avx512f_blendmv16si (target, tmp, target,
44879 force_reg (HImode, GEN_INT (1 << elt))));
44880 return;
44882 else
44883 break;
44884 case V32HImode:
44885 if (TARGET_AVX512F && TARGET_AVX512BW)
44887 tmp = gen_reg_rtx (mode);
44888 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44889 gen_rtx_VEC_DUPLICATE (mode, val)));
44890 emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target,
44891 force_reg (SImode, GEN_INT (1 << elt))));
44892 return;
44894 else
44895 break;
44896 case V64QImode:
44897 if (TARGET_AVX512F && TARGET_AVX512BW)
44899 tmp = gen_reg_rtx (mode);
44900 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44901 gen_rtx_VEC_DUPLICATE (mode, val)));
44902 emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target,
44903 force_reg (DImode, GEN_INT (1 << elt))));
44904 return;
44906 else
44907 break;
44909 default:
44910 break;
44913 if (use_vec_merge)
44915 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
44916 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
44917 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44919 else
44921 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
44923 emit_move_insn (mem, target);
44925 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
44926 emit_move_insn (tmp, val);
44928 emit_move_insn (target, mem);
44932 void
44933 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
44935 machine_mode mode = GET_MODE (vec);
44936 machine_mode inner_mode = GET_MODE_INNER (mode);
44937 bool use_vec_extr = false;
44938 rtx tmp;
44940 switch (mode)
44942 case V2SImode:
44943 case V2SFmode:
44944 if (!mmx_ok)
44945 break;
44946 /* FALLTHRU */
44948 case V2DFmode:
44949 case V2DImode:
44950 use_vec_extr = true;
44951 break;
44953 case V4SFmode:
44954 use_vec_extr = TARGET_SSE4_1;
44955 if (use_vec_extr)
44956 break;
44958 switch (elt)
44960 case 0:
44961 tmp = vec;
44962 break;
44964 case 1:
44965 case 3:
44966 tmp = gen_reg_rtx (mode);
44967 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
44968 GEN_INT (elt), GEN_INT (elt),
44969 GEN_INT (elt+4), GEN_INT (elt+4)));
44970 break;
44972 case 2:
44973 tmp = gen_reg_rtx (mode);
44974 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
44975 break;
44977 default:
44978 gcc_unreachable ();
44980 vec = tmp;
44981 use_vec_extr = true;
44982 elt = 0;
44983 break;
44985 case V4SImode:
44986 use_vec_extr = TARGET_SSE4_1;
44987 if (use_vec_extr)
44988 break;
44990 if (TARGET_SSE2)
44992 switch (elt)
44994 case 0:
44995 tmp = vec;
44996 break;
44998 case 1:
44999 case 3:
45000 tmp = gen_reg_rtx (mode);
45001 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
45002 GEN_INT (elt), GEN_INT (elt),
45003 GEN_INT (elt), GEN_INT (elt)));
45004 break;
45006 case 2:
45007 tmp = gen_reg_rtx (mode);
45008 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
45009 break;
45011 default:
45012 gcc_unreachable ();
45014 vec = tmp;
45015 use_vec_extr = true;
45016 elt = 0;
45018 else
45020 /* For SSE1, we have to reuse the V4SF code. */
45021 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
45022 gen_lowpart (V4SFmode, vec), elt);
45023 return;
45025 break;
45027 case V8HImode:
45028 use_vec_extr = TARGET_SSE2;
45029 break;
45030 case V4HImode:
45031 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45032 break;
45034 case V16QImode:
45035 use_vec_extr = TARGET_SSE4_1;
45036 break;
45038 case V8SFmode:
45039 if (TARGET_AVX)
45041 tmp = gen_reg_rtx (V4SFmode);
45042 if (elt < 4)
45043 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
45044 else
45045 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
45046 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45047 return;
45049 break;
45051 case V4DFmode:
45052 if (TARGET_AVX)
45054 tmp = gen_reg_rtx (V2DFmode);
45055 if (elt < 2)
45056 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
45057 else
45058 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
45059 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45060 return;
45062 break;
45064 case V32QImode:
45065 if (TARGET_AVX)
45067 tmp = gen_reg_rtx (V16QImode);
45068 if (elt < 16)
45069 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
45070 else
45071 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
45072 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45073 return;
45075 break;
45077 case V16HImode:
45078 if (TARGET_AVX)
45080 tmp = gen_reg_rtx (V8HImode);
45081 if (elt < 8)
45082 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
45083 else
45084 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
45085 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45086 return;
45088 break;
45090 case V8SImode:
45091 if (TARGET_AVX)
45093 tmp = gen_reg_rtx (V4SImode);
45094 if (elt < 4)
45095 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
45096 else
45097 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
45098 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45099 return;
45101 break;
45103 case V4DImode:
45104 if (TARGET_AVX)
45106 tmp = gen_reg_rtx (V2DImode);
45107 if (elt < 2)
45108 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
45109 else
45110 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
45111 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45112 return;
45114 break;
45116 case V32HImode:
45117 if (TARGET_AVX512BW)
45119 tmp = gen_reg_rtx (V16HImode);
45120 if (elt < 16)
45121 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
45122 else
45123 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
45124 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45125 return;
45127 break;
45129 case V64QImode:
45130 if (TARGET_AVX512BW)
45132 tmp = gen_reg_rtx (V32QImode);
45133 if (elt < 32)
45134 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
45135 else
45136 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
45137 ix86_expand_vector_extract (false, target, tmp, elt & 31);
45138 return;
45140 break;
45142 case V16SFmode:
45143 tmp = gen_reg_rtx (V8SFmode);
45144 if (elt < 8)
45145 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
45146 else
45147 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
45148 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45149 return;
45151 case V8DFmode:
45152 tmp = gen_reg_rtx (V4DFmode);
45153 if (elt < 4)
45154 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
45155 else
45156 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
45157 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45158 return;
45160 case V16SImode:
45161 tmp = gen_reg_rtx (V8SImode);
45162 if (elt < 8)
45163 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
45164 else
45165 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
45166 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45167 return;
45169 case V8DImode:
45170 tmp = gen_reg_rtx (V4DImode);
45171 if (elt < 4)
45172 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
45173 else
45174 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
45175 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45176 return;
45178 case V8QImode:
45179 /* ??? Could extract the appropriate HImode element and shift. */
45180 default:
45181 break;
45184 if (use_vec_extr)
45186 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
45187 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
45189 /* Let the rtl optimizers know about the zero extension performed. */
45190 if (inner_mode == QImode || inner_mode == HImode)
45192 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
45193 target = gen_lowpart (SImode, target);
45196 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45198 else
45200 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45202 emit_move_insn (mem, vec);
45204 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45205 emit_move_insn (target, tmp);
45209 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
45210 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
45211 The upper bits of DEST are undefined, though they shouldn't cause
45212 exceptions (some bits from src or all zeros are ok). */
45214 static void
45215 emit_reduc_half (rtx dest, rtx src, int i)
45217 rtx tem, d = dest;
45218 switch (GET_MODE (src))
45220 case V4SFmode:
45221 if (i == 128)
45222 tem = gen_sse_movhlps (dest, src, src);
45223 else
45224 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
45225 GEN_INT (1 + 4), GEN_INT (1 + 4));
45226 break;
45227 case V2DFmode:
45228 tem = gen_vec_interleave_highv2df (dest, src, src);
45229 break;
45230 case V16QImode:
45231 case V8HImode:
45232 case V4SImode:
45233 case V2DImode:
45234 d = gen_reg_rtx (V1TImode);
45235 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
45236 GEN_INT (i / 2));
45237 break;
45238 case V8SFmode:
45239 if (i == 256)
45240 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
45241 else
45242 tem = gen_avx_shufps256 (dest, src, src,
45243 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
45244 break;
45245 case V4DFmode:
45246 if (i == 256)
45247 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
45248 else
45249 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
45250 break;
45251 case V32QImode:
45252 case V16HImode:
45253 case V8SImode:
45254 case V4DImode:
45255 if (i == 256)
45257 if (GET_MODE (dest) != V4DImode)
45258 d = gen_reg_rtx (V4DImode);
45259 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
45260 gen_lowpart (V4DImode, src),
45261 const1_rtx);
45263 else
45265 d = gen_reg_rtx (V2TImode);
45266 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
45267 GEN_INT (i / 2));
45269 break;
45270 case V64QImode:
45271 case V32HImode:
45272 case V16SImode:
45273 case V16SFmode:
45274 case V8DImode:
45275 case V8DFmode:
45276 if (i > 128)
45277 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
45278 gen_lowpart (V16SImode, src),
45279 gen_lowpart (V16SImode, src),
45280 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
45281 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
45282 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
45283 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
45284 GEN_INT (0xC), GEN_INT (0xD),
45285 GEN_INT (0xE), GEN_INT (0xF),
45286 GEN_INT (0x10), GEN_INT (0x11),
45287 GEN_INT (0x12), GEN_INT (0x13),
45288 GEN_INT (0x14), GEN_INT (0x15),
45289 GEN_INT (0x16), GEN_INT (0x17));
45290 else
45291 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45292 gen_lowpart (V16SImode, src),
45293 GEN_INT (i == 128 ? 0x2 : 0x1),
45294 GEN_INT (0x3),
45295 GEN_INT (0x3),
45296 GEN_INT (0x3),
45297 GEN_INT (i == 128 ? 0x6 : 0x5),
45298 GEN_INT (0x7),
45299 GEN_INT (0x7),
45300 GEN_INT (0x7),
45301 GEN_INT (i == 128 ? 0xA : 0x9),
45302 GEN_INT (0xB),
45303 GEN_INT (0xB),
45304 GEN_INT (0xB),
45305 GEN_INT (i == 128 ? 0xE : 0xD),
45306 GEN_INT (0xF),
45307 GEN_INT (0xF),
45308 GEN_INT (0xF));
45309 break;
45310 default:
45311 gcc_unreachable ();
45313 emit_insn (tem);
45314 if (d != dest)
45315 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45318 /* Expand a vector reduction. FN is the binary pattern to reduce;
45319 DEST is the destination; IN is the input vector. */
45321 void
45322 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45324 rtx half, dst, vec = in;
45325 machine_mode mode = GET_MODE (in);
45326 int i;
45328 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
45329 if (TARGET_SSE4_1
45330 && mode == V8HImode
45331 && fn == gen_uminv8hi3)
45333 emit_insn (gen_sse4_1_phminposuw (dest, in));
45334 return;
45337 for (i = GET_MODE_BITSIZE (mode);
45338 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
45339 i >>= 1)
45341 half = gen_reg_rtx (mode);
45342 emit_reduc_half (half, vec, i);
45343 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
45344 dst = dest;
45345 else
45346 dst = gen_reg_rtx (mode);
45347 emit_insn (fn (dst, half, vec));
45348 vec = dst;
45352 /* Target hook for scalar_mode_supported_p. */
45353 static bool
45354 ix86_scalar_mode_supported_p (machine_mode mode)
45356 if (DECIMAL_FLOAT_MODE_P (mode))
45357 return default_decimal_float_supported_p ();
45358 else if (mode == TFmode)
45359 return true;
45360 else
45361 return default_scalar_mode_supported_p (mode);
45364 /* Implements target hook vector_mode_supported_p. */
45365 static bool
45366 ix86_vector_mode_supported_p (machine_mode mode)
45368 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45369 return true;
45370 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45371 return true;
45372 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45373 return true;
45374 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45375 return true;
45376 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45377 return true;
45378 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45379 return true;
45380 return false;
45383 /* Implement target hook libgcc_floating_mode_supported_p. */
45384 static bool
45385 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45387 switch (mode)
45389 case SFmode:
45390 case DFmode:
45391 case XFmode:
45392 return true;
45394 case TFmode:
45395 #ifdef IX86_NO_LIBGCC_TFMODE
45396 return false;
45397 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45398 return TARGET_LONG_DOUBLE_128;
45399 #else
45400 return true;
45401 #endif
45403 default:
45404 return false;
45408 /* Target hook for c_mode_for_suffix. */
45409 static machine_mode
45410 ix86_c_mode_for_suffix (char suffix)
45412 if (suffix == 'q')
45413 return TFmode;
45414 if (suffix == 'w')
45415 return XFmode;
45417 return VOIDmode;
45420 /* Worker function for TARGET_MD_ASM_CLOBBERS.
45422 We do this in the new i386 backend to maintain source compatibility
45423 with the old cc0-based compiler. */
45425 static tree
45426 ix86_md_asm_clobbers (tree, tree, tree clobbers)
45428 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
45429 clobbers);
45430 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
45431 clobbers);
45432 return clobbers;
45435 /* Implements target vector targetm.asm.encode_section_info. */
45437 static void ATTRIBUTE_UNUSED
45438 ix86_encode_section_info (tree decl, rtx rtl, int first)
45440 default_encode_section_info (decl, rtl, first);
45442 if (ix86_in_large_data_p (decl))
45443 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45446 /* Worker function for REVERSE_CONDITION. */
45448 enum rtx_code
45449 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45451 return (mode != CCFPmode && mode != CCFPUmode
45452 ? reverse_condition (code)
45453 : reverse_condition_maybe_unordered (code));
45456 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45457 to OPERANDS[0]. */
45459 const char *
45460 output_387_reg_move (rtx insn, rtx *operands)
45462 if (REG_P (operands[0]))
45464 if (REG_P (operands[1])
45465 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45467 if (REGNO (operands[0]) == FIRST_STACK_REG)
45468 return output_387_ffreep (operands, 0);
45469 return "fstp\t%y0";
45471 if (STACK_TOP_P (operands[0]))
45472 return "fld%Z1\t%y1";
45473 return "fst\t%y0";
45475 else if (MEM_P (operands[0]))
45477 gcc_assert (REG_P (operands[1]));
45478 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45479 return "fstp%Z0\t%y0";
45480 else
45482 /* There is no non-popping store to memory for XFmode.
45483 So if we need one, follow the store with a load. */
45484 if (GET_MODE (operands[0]) == XFmode)
45485 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45486 else
45487 return "fst%Z0\t%y0";
45490 else
45491 gcc_unreachable();
45494 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45495 FP status register is set. */
45497 void
45498 ix86_emit_fp_unordered_jump (rtx label)
45500 rtx reg = gen_reg_rtx (HImode);
45501 rtx temp;
45503 emit_insn (gen_x86_fnstsw_1 (reg));
45505 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
45507 emit_insn (gen_x86_sahf_1 (reg));
45509 temp = gen_rtx_REG (CCmode, FLAGS_REG);
45510 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
45512 else
45514 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
45516 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
45517 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
45520 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
45521 gen_rtx_LABEL_REF (VOIDmode, label),
45522 pc_rtx);
45523 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
45525 emit_jump_insn (temp);
45526 predict_jump (REG_BR_PROB_BASE * 10 / 100);
45529 /* Output code to perform a log1p XFmode calculation. */
45531 void ix86_emit_i387_log1p (rtx op0, rtx op1)
45533 rtx_code_label *label1 = gen_label_rtx ();
45534 rtx_code_label *label2 = gen_label_rtx ();
45536 rtx tmp = gen_reg_rtx (XFmode);
45537 rtx tmp2 = gen_reg_rtx (XFmode);
45538 rtx test;
45540 emit_insn (gen_absxf2 (tmp, op1));
45541 test = gen_rtx_GE (VOIDmode, tmp,
45542 CONST_DOUBLE_FROM_REAL_VALUE (
45543 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
45544 XFmode));
45545 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
45547 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45548 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
45549 emit_jump (label2);
45551 emit_label (label1);
45552 emit_move_insn (tmp, CONST1_RTX (XFmode));
45553 emit_insn (gen_addxf3 (tmp, op1, tmp));
45554 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45555 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
45557 emit_label (label2);
45560 /* Emit code for round calculation. */
45561 void ix86_emit_i387_round (rtx op0, rtx op1)
45563 machine_mode inmode = GET_MODE (op1);
45564 machine_mode outmode = GET_MODE (op0);
45565 rtx e1, e2, res, tmp, tmp1, half;
45566 rtx scratch = gen_reg_rtx (HImode);
45567 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
45568 rtx_code_label *jump_label = gen_label_rtx ();
45569 rtx insn;
45570 rtx (*gen_abs) (rtx, rtx);
45571 rtx (*gen_neg) (rtx, rtx);
45573 switch (inmode)
45575 case SFmode:
45576 gen_abs = gen_abssf2;
45577 break;
45578 case DFmode:
45579 gen_abs = gen_absdf2;
45580 break;
45581 case XFmode:
45582 gen_abs = gen_absxf2;
45583 break;
45584 default:
45585 gcc_unreachable ();
45588 switch (outmode)
45590 case SFmode:
45591 gen_neg = gen_negsf2;
45592 break;
45593 case DFmode:
45594 gen_neg = gen_negdf2;
45595 break;
45596 case XFmode:
45597 gen_neg = gen_negxf2;
45598 break;
45599 case HImode:
45600 gen_neg = gen_neghi2;
45601 break;
45602 case SImode:
45603 gen_neg = gen_negsi2;
45604 break;
45605 case DImode:
45606 gen_neg = gen_negdi2;
45607 break;
45608 default:
45609 gcc_unreachable ();
45612 e1 = gen_reg_rtx (inmode);
45613 e2 = gen_reg_rtx (inmode);
45614 res = gen_reg_rtx (outmode);
45616 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
45618 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
45620 /* scratch = fxam(op1) */
45621 emit_insn (gen_rtx_SET (VOIDmode, scratch,
45622 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
45623 UNSPEC_FXAM)));
45624 /* e1 = fabs(op1) */
45625 emit_insn (gen_abs (e1, op1));
45627 /* e2 = e1 + 0.5 */
45628 half = force_reg (inmode, half);
45629 emit_insn (gen_rtx_SET (VOIDmode, e2,
45630 gen_rtx_PLUS (inmode, e1, half)));
45632 /* res = floor(e2) */
45633 if (inmode != XFmode)
45635 tmp1 = gen_reg_rtx (XFmode);
45637 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
45638 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
45640 else
45641 tmp1 = e2;
45643 switch (outmode)
45645 case SFmode:
45646 case DFmode:
45648 rtx tmp0 = gen_reg_rtx (XFmode);
45650 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
45652 emit_insn (gen_rtx_SET (VOIDmode, res,
45653 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
45654 UNSPEC_TRUNC_NOOP)));
45656 break;
45657 case XFmode:
45658 emit_insn (gen_frndintxf2_floor (res, tmp1));
45659 break;
45660 case HImode:
45661 emit_insn (gen_lfloorxfhi2 (res, tmp1));
45662 break;
45663 case SImode:
45664 emit_insn (gen_lfloorxfsi2 (res, tmp1));
45665 break;
45666 case DImode:
45667 emit_insn (gen_lfloorxfdi2 (res, tmp1));
45668 break;
45669 default:
45670 gcc_unreachable ();
45673 /* flags = signbit(a) */
45674 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
45676 /* if (flags) then res = -res */
45677 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
45678 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
45679 gen_rtx_LABEL_REF (VOIDmode, jump_label),
45680 pc_rtx);
45681 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
45682 predict_jump (REG_BR_PROB_BASE * 50 / 100);
45683 JUMP_LABEL (insn) = jump_label;
45685 emit_insn (gen_neg (res, res));
45687 emit_label (jump_label);
45688 LABEL_NUSES (jump_label) = 1;
45690 emit_move_insn (op0, res);
45693 /* Output code to perform a Newton-Rhapson approximation of a single precision
45694 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
45696 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
45698 rtx x0, x1, e0, e1;
45700 x0 = gen_reg_rtx (mode);
45701 e0 = gen_reg_rtx (mode);
45702 e1 = gen_reg_rtx (mode);
45703 x1 = gen_reg_rtx (mode);
45705 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
45707 b = force_reg (mode, b);
45709 /* x0 = rcp(b) estimate */
45710 if (mode == V16SFmode || mode == V8DFmode)
45711 emit_insn (gen_rtx_SET (VOIDmode, x0,
45712 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45713 UNSPEC_RCP14)));
45714 else
45715 emit_insn (gen_rtx_SET (VOIDmode, x0,
45716 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45717 UNSPEC_RCP)));
45719 /* e0 = x0 * b */
45720 emit_insn (gen_rtx_SET (VOIDmode, e0,
45721 gen_rtx_MULT (mode, x0, b)));
45723 /* e0 = x0 * e0 */
45724 emit_insn (gen_rtx_SET (VOIDmode, e0,
45725 gen_rtx_MULT (mode, x0, e0)));
45727 /* e1 = x0 + x0 */
45728 emit_insn (gen_rtx_SET (VOIDmode, e1,
45729 gen_rtx_PLUS (mode, x0, x0)));
45731 /* x1 = e1 - e0 */
45732 emit_insn (gen_rtx_SET (VOIDmode, x1,
45733 gen_rtx_MINUS (mode, e1, e0)));
45735 /* res = a * x1 */
45736 emit_insn (gen_rtx_SET (VOIDmode, res,
45737 gen_rtx_MULT (mode, a, x1)));
45740 /* Output code to perform a Newton-Rhapson approximation of a
45741 single precision floating point [reciprocal] square root. */
45743 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
45744 bool recip)
45746 rtx x0, e0, e1, e2, e3, mthree, mhalf;
45747 REAL_VALUE_TYPE r;
45748 int unspec;
45750 x0 = gen_reg_rtx (mode);
45751 e0 = gen_reg_rtx (mode);
45752 e1 = gen_reg_rtx (mode);
45753 e2 = gen_reg_rtx (mode);
45754 e3 = gen_reg_rtx (mode);
45756 real_from_integer (&r, VOIDmode, -3, SIGNED);
45757 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45759 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
45760 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45761 unspec = UNSPEC_RSQRT;
45763 if (VECTOR_MODE_P (mode))
45765 mthree = ix86_build_const_vector (mode, true, mthree);
45766 mhalf = ix86_build_const_vector (mode, true, mhalf);
45767 /* There is no 512-bit rsqrt. There is however rsqrt14. */
45768 if (GET_MODE_SIZE (mode) == 64)
45769 unspec = UNSPEC_RSQRT14;
45772 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
45773 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
45775 a = force_reg (mode, a);
45777 /* x0 = rsqrt(a) estimate */
45778 emit_insn (gen_rtx_SET (VOIDmode, x0,
45779 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45780 unspec)));
45782 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
45783 if (!recip)
45785 rtx zero, mask;
45787 zero = gen_reg_rtx (mode);
45788 mask = gen_reg_rtx (mode);
45790 zero = force_reg (mode, CONST0_RTX(mode));
45792 /* Handle masked compare. */
45793 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
45795 mask = gen_reg_rtx (HImode);
45796 /* Imm value 0x4 corresponds to not-equal comparison. */
45797 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
45798 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
45800 else
45802 emit_insn (gen_rtx_SET (VOIDmode, mask,
45803 gen_rtx_NE (mode, zero, a)));
45805 emit_insn (gen_rtx_SET (VOIDmode, x0,
45806 gen_rtx_AND (mode, x0, mask)));
45810 /* e0 = x0 * a */
45811 emit_insn (gen_rtx_SET (VOIDmode, e0,
45812 gen_rtx_MULT (mode, x0, a)));
45813 /* e1 = e0 * x0 */
45814 emit_insn (gen_rtx_SET (VOIDmode, e1,
45815 gen_rtx_MULT (mode, e0, x0)));
45817 /* e2 = e1 - 3. */
45818 mthree = force_reg (mode, mthree);
45819 emit_insn (gen_rtx_SET (VOIDmode, e2,
45820 gen_rtx_PLUS (mode, e1, mthree)));
45822 mhalf = force_reg (mode, mhalf);
45823 if (recip)
45824 /* e3 = -.5 * x0 */
45825 emit_insn (gen_rtx_SET (VOIDmode, e3,
45826 gen_rtx_MULT (mode, x0, mhalf)));
45827 else
45828 /* e3 = -.5 * e0 */
45829 emit_insn (gen_rtx_SET (VOIDmode, e3,
45830 gen_rtx_MULT (mode, e0, mhalf)));
45831 /* ret = e2 * e3 */
45832 emit_insn (gen_rtx_SET (VOIDmode, res,
45833 gen_rtx_MULT (mode, e2, e3)));
45836 #ifdef TARGET_SOLARIS
45837 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
45839 static void
45840 i386_solaris_elf_named_section (const char *name, unsigned int flags,
45841 tree decl)
45843 /* With Binutils 2.15, the "@unwind" marker must be specified on
45844 every occurrence of the ".eh_frame" section, not just the first
45845 one. */
45846 if (TARGET_64BIT
45847 && strcmp (name, ".eh_frame") == 0)
45849 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
45850 flags & SECTION_WRITE ? "aw" : "a");
45851 return;
45854 #ifndef USE_GAS
45855 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
45857 solaris_elf_asm_comdat_section (name, flags, decl);
45858 return;
45860 #endif
45862 default_elf_asm_named_section (name, flags, decl);
45864 #endif /* TARGET_SOLARIS */
45866 /* Return the mangling of TYPE if it is an extended fundamental type. */
45868 static const char *
45869 ix86_mangle_type (const_tree type)
45871 type = TYPE_MAIN_VARIANT (type);
45873 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
45874 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
45875 return NULL;
45877 switch (TYPE_MODE (type))
45879 case TFmode:
45880 /* __float128 is "g". */
45881 return "g";
45882 case XFmode:
45883 /* "long double" or __float80 is "e". */
45884 return "e";
45885 default:
45886 return NULL;
45890 /* For 32-bit code we can save PIC register setup by using
45891 __stack_chk_fail_local hidden function instead of calling
45892 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
45893 register, so it is better to call __stack_chk_fail directly. */
45895 static tree ATTRIBUTE_UNUSED
45896 ix86_stack_protect_fail (void)
45898 return TARGET_64BIT
45899 ? default_external_stack_protect_fail ()
45900 : default_hidden_stack_protect_fail ();
45903 /* Select a format to encode pointers in exception handling data. CODE
45904 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
45905 true if the symbol may be affected by dynamic relocations.
45907 ??? All x86 object file formats are capable of representing this.
45908 After all, the relocation needed is the same as for the call insn.
45909 Whether or not a particular assembler allows us to enter such, I
45910 guess we'll have to see. */
45912 asm_preferred_eh_data_format (int code, int global)
45914 if (flag_pic)
45916 int type = DW_EH_PE_sdata8;
45917 if (!TARGET_64BIT
45918 || ix86_cmodel == CM_SMALL_PIC
45919 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
45920 type = DW_EH_PE_sdata4;
45921 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
45923 if (ix86_cmodel == CM_SMALL
45924 || (ix86_cmodel == CM_MEDIUM && code))
45925 return DW_EH_PE_udata4;
45926 return DW_EH_PE_absptr;
45929 /* Expand copysign from SIGN to the positive value ABS_VALUE
45930 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
45931 the sign-bit. */
45932 static void
45933 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
45935 machine_mode mode = GET_MODE (sign);
45936 rtx sgn = gen_reg_rtx (mode);
45937 if (mask == NULL_RTX)
45939 machine_mode vmode;
45941 if (mode == SFmode)
45942 vmode = V4SFmode;
45943 else if (mode == DFmode)
45944 vmode = V2DFmode;
45945 else
45946 vmode = mode;
45948 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
45949 if (!VECTOR_MODE_P (mode))
45951 /* We need to generate a scalar mode mask in this case. */
45952 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45953 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45954 mask = gen_reg_rtx (mode);
45955 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
45958 else
45959 mask = gen_rtx_NOT (mode, mask);
45960 emit_insn (gen_rtx_SET (VOIDmode, sgn,
45961 gen_rtx_AND (mode, mask, sign)));
45962 emit_insn (gen_rtx_SET (VOIDmode, result,
45963 gen_rtx_IOR (mode, abs_value, sgn)));
45966 /* Expand fabs (OP0) and return a new rtx that holds the result. The
45967 mask for masking out the sign-bit is stored in *SMASK, if that is
45968 non-null. */
45969 static rtx
45970 ix86_expand_sse_fabs (rtx op0, rtx *smask)
45972 machine_mode vmode, mode = GET_MODE (op0);
45973 rtx xa, mask;
45975 xa = gen_reg_rtx (mode);
45976 if (mode == SFmode)
45977 vmode = V4SFmode;
45978 else if (mode == DFmode)
45979 vmode = V2DFmode;
45980 else
45981 vmode = mode;
45982 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
45983 if (!VECTOR_MODE_P (mode))
45985 /* We need to generate a scalar mode mask in this case. */
45986 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45987 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45988 mask = gen_reg_rtx (mode);
45989 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
45991 emit_insn (gen_rtx_SET (VOIDmode, xa,
45992 gen_rtx_AND (mode, op0, mask)));
45994 if (smask)
45995 *smask = mask;
45997 return xa;
46000 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
46001 swapping the operands if SWAP_OPERANDS is true. The expanded
46002 code is a forward jump to a newly created label in case the
46003 comparison is true. The generated label rtx is returned. */
46004 static rtx_code_label *
46005 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
46006 bool swap_operands)
46008 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
46009 rtx_code_label *label;
46010 rtx tmp;
46012 if (swap_operands)
46013 std::swap (op0, op1);
46015 label = gen_label_rtx ();
46016 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
46017 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46018 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
46019 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
46020 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
46021 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
46022 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
46023 JUMP_LABEL (tmp) = label;
46025 return label;
46028 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
46029 using comparison code CODE. Operands are swapped for the comparison if
46030 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
46031 static rtx
46032 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
46033 bool swap_operands)
46035 rtx (*insn)(rtx, rtx, rtx, rtx);
46036 machine_mode mode = GET_MODE (op0);
46037 rtx mask = gen_reg_rtx (mode);
46039 if (swap_operands)
46040 std::swap (op0, op1);
46042 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
46044 emit_insn (insn (mask, op0, op1,
46045 gen_rtx_fmt_ee (code, mode, op0, op1)));
46046 return mask;
46049 /* Generate and return a rtx of mode MODE for 2**n where n is the number
46050 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
46051 static rtx
46052 ix86_gen_TWO52 (machine_mode mode)
46054 REAL_VALUE_TYPE TWO52r;
46055 rtx TWO52;
46057 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
46058 TWO52 = const_double_from_real_value (TWO52r, mode);
46059 TWO52 = force_reg (mode, TWO52);
46061 return TWO52;
46064 /* Expand SSE sequence for computing lround from OP1 storing
46065 into OP0. */
46066 void
46067 ix86_expand_lround (rtx op0, rtx op1)
46069 /* C code for the stuff we're doing below:
46070 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
46071 return (long)tmp;
46073 machine_mode mode = GET_MODE (op1);
46074 const struct real_format *fmt;
46075 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46076 rtx adj;
46078 /* load nextafter (0.5, 0.0) */
46079 fmt = REAL_MODE_FORMAT (mode);
46080 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46081 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46083 /* adj = copysign (0.5, op1) */
46084 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
46085 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
46087 /* adj = op1 + adj */
46088 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
46090 /* op0 = (imode)adj */
46091 expand_fix (op0, adj, 0);
46094 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
46095 into OPERAND0. */
46096 void
46097 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
46099 /* C code for the stuff we're doing below (for do_floor):
46100 xi = (long)op1;
46101 xi -= (double)xi > op1 ? 1 : 0;
46102 return xi;
46104 machine_mode fmode = GET_MODE (op1);
46105 machine_mode imode = GET_MODE (op0);
46106 rtx ireg, freg, tmp;
46107 rtx_code_label *label;
46109 /* reg = (long)op1 */
46110 ireg = gen_reg_rtx (imode);
46111 expand_fix (ireg, op1, 0);
46113 /* freg = (double)reg */
46114 freg = gen_reg_rtx (fmode);
46115 expand_float (freg, ireg, 0);
46117 /* ireg = (freg > op1) ? ireg - 1 : ireg */
46118 label = ix86_expand_sse_compare_and_jump (UNLE,
46119 freg, op1, !do_floor);
46120 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
46121 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
46122 emit_move_insn (ireg, tmp);
46124 emit_label (label);
46125 LABEL_NUSES (label) = 1;
46127 emit_move_insn (op0, ireg);
46130 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
46131 result in OPERAND0. */
46132 void
46133 ix86_expand_rint (rtx operand0, rtx operand1)
46135 /* C code for the stuff we're doing below:
46136 xa = fabs (operand1);
46137 if (!isless (xa, 2**52))
46138 return operand1;
46139 xa = xa + 2**52 - 2**52;
46140 return copysign (xa, operand1);
46142 machine_mode mode = GET_MODE (operand0);
46143 rtx res, xa, TWO52, mask;
46144 rtx_code_label *label;
46146 res = gen_reg_rtx (mode);
46147 emit_move_insn (res, operand1);
46149 /* xa = abs (operand1) */
46150 xa = ix86_expand_sse_fabs (res, &mask);
46152 /* if (!isless (xa, TWO52)) goto label; */
46153 TWO52 = ix86_gen_TWO52 (mode);
46154 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46156 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46157 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46159 ix86_sse_copysign_to_positive (res, xa, res, mask);
46161 emit_label (label);
46162 LABEL_NUSES (label) = 1;
46164 emit_move_insn (operand0, res);
46167 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46168 into OPERAND0. */
46169 void
46170 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
46172 /* C code for the stuff we expand below.
46173 double xa = fabs (x), x2;
46174 if (!isless (xa, TWO52))
46175 return x;
46176 xa = xa + TWO52 - TWO52;
46177 x2 = copysign (xa, x);
46178 Compensate. Floor:
46179 if (x2 > x)
46180 x2 -= 1;
46181 Compensate. Ceil:
46182 if (x2 < x)
46183 x2 -= -1;
46184 return x2;
46186 machine_mode mode = GET_MODE (operand0);
46187 rtx xa, TWO52, tmp, one, res, mask;
46188 rtx_code_label *label;
46190 TWO52 = ix86_gen_TWO52 (mode);
46192 /* Temporary for holding the result, initialized to the input
46193 operand to ease control flow. */
46194 res = gen_reg_rtx (mode);
46195 emit_move_insn (res, operand1);
46197 /* xa = abs (operand1) */
46198 xa = ix86_expand_sse_fabs (res, &mask);
46200 /* if (!isless (xa, TWO52)) goto label; */
46201 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46203 /* xa = xa + TWO52 - TWO52; */
46204 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46205 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46207 /* xa = copysign (xa, operand1) */
46208 ix86_sse_copysign_to_positive (xa, xa, res, mask);
46210 /* generate 1.0 or -1.0 */
46211 one = force_reg (mode,
46212 const_double_from_real_value (do_floor
46213 ? dconst1 : dconstm1, mode));
46215 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46216 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46217 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46218 gen_rtx_AND (mode, one, tmp)));
46219 /* We always need to subtract here to preserve signed zero. */
46220 tmp = expand_simple_binop (mode, MINUS,
46221 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46222 emit_move_insn (res, tmp);
46224 emit_label (label);
46225 LABEL_NUSES (label) = 1;
46227 emit_move_insn (operand0, res);
46230 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46231 into OPERAND0. */
46232 void
46233 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
46235 /* C code for the stuff we expand below.
46236 double xa = fabs (x), x2;
46237 if (!isless (xa, TWO52))
46238 return x;
46239 x2 = (double)(long)x;
46240 Compensate. Floor:
46241 if (x2 > x)
46242 x2 -= 1;
46243 Compensate. Ceil:
46244 if (x2 < x)
46245 x2 += 1;
46246 if (HONOR_SIGNED_ZEROS (mode))
46247 return copysign (x2, x);
46248 return x2;
46250 machine_mode mode = GET_MODE (operand0);
46251 rtx xa, xi, TWO52, tmp, one, res, mask;
46252 rtx_code_label *label;
46254 TWO52 = ix86_gen_TWO52 (mode);
46256 /* Temporary for holding the result, initialized to the input
46257 operand to ease control flow. */
46258 res = gen_reg_rtx (mode);
46259 emit_move_insn (res, operand1);
46261 /* xa = abs (operand1) */
46262 xa = ix86_expand_sse_fabs (res, &mask);
46264 /* if (!isless (xa, TWO52)) goto label; */
46265 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46267 /* xa = (double)(long)x */
46268 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46269 expand_fix (xi, res, 0);
46270 expand_float (xa, xi, 0);
46272 /* generate 1.0 */
46273 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46275 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46276 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46277 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46278 gen_rtx_AND (mode, one, tmp)));
46279 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
46280 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46281 emit_move_insn (res, tmp);
46283 if (HONOR_SIGNED_ZEROS (mode))
46284 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46286 emit_label (label);
46287 LABEL_NUSES (label) = 1;
46289 emit_move_insn (operand0, res);
46292 /* Expand SSE sequence for computing round from OPERAND1 storing
46293 into OPERAND0. Sequence that works without relying on DImode truncation
46294 via cvttsd2siq that is only available on 64bit targets. */
46295 void
46296 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46298 /* C code for the stuff we expand below.
46299 double xa = fabs (x), xa2, x2;
46300 if (!isless (xa, TWO52))
46301 return x;
46302 Using the absolute value and copying back sign makes
46303 -0.0 -> -0.0 correct.
46304 xa2 = xa + TWO52 - TWO52;
46305 Compensate.
46306 dxa = xa2 - xa;
46307 if (dxa <= -0.5)
46308 xa2 += 1;
46309 else if (dxa > 0.5)
46310 xa2 -= 1;
46311 x2 = copysign (xa2, x);
46312 return x2;
46314 machine_mode mode = GET_MODE (operand0);
46315 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46316 rtx_code_label *label;
46318 TWO52 = ix86_gen_TWO52 (mode);
46320 /* Temporary for holding the result, initialized to the input
46321 operand to ease control flow. */
46322 res = gen_reg_rtx (mode);
46323 emit_move_insn (res, operand1);
46325 /* xa = abs (operand1) */
46326 xa = ix86_expand_sse_fabs (res, &mask);
46328 /* if (!isless (xa, TWO52)) goto label; */
46329 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46331 /* xa2 = xa + TWO52 - TWO52; */
46332 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46333 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46335 /* dxa = xa2 - xa; */
46336 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46338 /* generate 0.5, 1.0 and -0.5 */
46339 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46340 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46341 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46342 0, OPTAB_DIRECT);
46344 /* Compensate. */
46345 tmp = gen_reg_rtx (mode);
46346 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46347 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46348 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46349 gen_rtx_AND (mode, one, tmp)));
46350 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46351 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46352 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46353 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46354 gen_rtx_AND (mode, one, tmp)));
46355 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46357 /* res = copysign (xa2, operand1) */
46358 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46360 emit_label (label);
46361 LABEL_NUSES (label) = 1;
46363 emit_move_insn (operand0, res);
46366 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46367 into OPERAND0. */
46368 void
46369 ix86_expand_trunc (rtx operand0, rtx operand1)
46371 /* C code for SSE variant we expand below.
46372 double xa = fabs (x), x2;
46373 if (!isless (xa, TWO52))
46374 return x;
46375 x2 = (double)(long)x;
46376 if (HONOR_SIGNED_ZEROS (mode))
46377 return copysign (x2, x);
46378 return x2;
46380 machine_mode mode = GET_MODE (operand0);
46381 rtx xa, xi, TWO52, res, mask;
46382 rtx_code_label *label;
46384 TWO52 = ix86_gen_TWO52 (mode);
46386 /* Temporary for holding the result, initialized to the input
46387 operand to ease control flow. */
46388 res = gen_reg_rtx (mode);
46389 emit_move_insn (res, operand1);
46391 /* xa = abs (operand1) */
46392 xa = ix86_expand_sse_fabs (res, &mask);
46394 /* if (!isless (xa, TWO52)) goto label; */
46395 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46397 /* x = (double)(long)x */
46398 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46399 expand_fix (xi, res, 0);
46400 expand_float (res, xi, 0);
46402 if (HONOR_SIGNED_ZEROS (mode))
46403 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46405 emit_label (label);
46406 LABEL_NUSES (label) = 1;
46408 emit_move_insn (operand0, res);
46411 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46412 into OPERAND0. */
46413 void
46414 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46416 machine_mode mode = GET_MODE (operand0);
46417 rtx xa, mask, TWO52, one, res, smask, tmp;
46418 rtx_code_label *label;
46420 /* C code for SSE variant we expand below.
46421 double xa = fabs (x), x2;
46422 if (!isless (xa, TWO52))
46423 return x;
46424 xa2 = xa + TWO52 - TWO52;
46425 Compensate:
46426 if (xa2 > xa)
46427 xa2 -= 1.0;
46428 x2 = copysign (xa2, x);
46429 return x2;
46432 TWO52 = ix86_gen_TWO52 (mode);
46434 /* Temporary for holding the result, initialized to the input
46435 operand to ease control flow. */
46436 res = gen_reg_rtx (mode);
46437 emit_move_insn (res, operand1);
46439 /* xa = abs (operand1) */
46440 xa = ix86_expand_sse_fabs (res, &smask);
46442 /* if (!isless (xa, TWO52)) goto label; */
46443 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46445 /* res = xa + TWO52 - TWO52; */
46446 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46447 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46448 emit_move_insn (res, tmp);
46450 /* generate 1.0 */
46451 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46453 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
46454 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46455 emit_insn (gen_rtx_SET (VOIDmode, mask,
46456 gen_rtx_AND (mode, mask, one)));
46457 tmp = expand_simple_binop (mode, MINUS,
46458 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46459 emit_move_insn (res, tmp);
46461 /* res = copysign (res, operand1) */
46462 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46464 emit_label (label);
46465 LABEL_NUSES (label) = 1;
46467 emit_move_insn (operand0, res);
46470 /* Expand SSE sequence for computing round from OPERAND1 storing
46471 into OPERAND0. */
46472 void
46473 ix86_expand_round (rtx operand0, rtx operand1)
46475 /* C code for the stuff we're doing below:
46476 double xa = fabs (x);
46477 if (!isless (xa, TWO52))
46478 return x;
46479 xa = (double)(long)(xa + nextafter (0.5, 0.0));
46480 return copysign (xa, x);
46482 machine_mode mode = GET_MODE (operand0);
46483 rtx res, TWO52, xa, xi, half, mask;
46484 rtx_code_label *label;
46485 const struct real_format *fmt;
46486 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46488 /* Temporary for holding the result, initialized to the input
46489 operand to ease control flow. */
46490 res = gen_reg_rtx (mode);
46491 emit_move_insn (res, operand1);
46493 TWO52 = ix86_gen_TWO52 (mode);
46494 xa = ix86_expand_sse_fabs (res, &mask);
46495 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46497 /* load nextafter (0.5, 0.0) */
46498 fmt = REAL_MODE_FORMAT (mode);
46499 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46500 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46502 /* xa = xa + 0.5 */
46503 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46504 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46506 /* xa = (double)(int64_t)xa */
46507 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46508 expand_fix (xi, xa, 0);
46509 expand_float (xa, xi, 0);
46511 /* res = copysign (xa, operand1) */
46512 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46514 emit_label (label);
46515 LABEL_NUSES (label) = 1;
46517 emit_move_insn (operand0, res);
46520 /* Expand SSE sequence for computing round
46521 from OP1 storing into OP0 using sse4 round insn. */
46522 void
46523 ix86_expand_round_sse4 (rtx op0, rtx op1)
46525 machine_mode mode = GET_MODE (op0);
46526 rtx e1, e2, res, half;
46527 const struct real_format *fmt;
46528 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46529 rtx (*gen_copysign) (rtx, rtx, rtx);
46530 rtx (*gen_round) (rtx, rtx, rtx);
46532 switch (mode)
46534 case SFmode:
46535 gen_copysign = gen_copysignsf3;
46536 gen_round = gen_sse4_1_roundsf2;
46537 break;
46538 case DFmode:
46539 gen_copysign = gen_copysigndf3;
46540 gen_round = gen_sse4_1_rounddf2;
46541 break;
46542 default:
46543 gcc_unreachable ();
46546 /* round (a) = trunc (a + copysign (0.5, a)) */
46548 /* load nextafter (0.5, 0.0) */
46549 fmt = REAL_MODE_FORMAT (mode);
46550 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46551 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46552 half = const_double_from_real_value (pred_half, mode);
46554 /* e1 = copysign (0.5, op1) */
46555 e1 = gen_reg_rtx (mode);
46556 emit_insn (gen_copysign (e1, half, op1));
46558 /* e2 = op1 + e1 */
46559 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46561 /* res = trunc (e2) */
46562 res = gen_reg_rtx (mode);
46563 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46565 emit_move_insn (op0, res);
46569 /* Table of valid machine attributes. */
46570 static const struct attribute_spec ix86_attribute_table[] =
46572 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
46573 affects_type_identity } */
46574 /* Stdcall attribute says callee is responsible for popping arguments
46575 if they are not variable. */
46576 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46577 true },
46578 /* Fastcall attribute says callee is responsible for popping arguments
46579 if they are not variable. */
46580 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46581 true },
46582 /* Thiscall attribute says callee is responsible for popping arguments
46583 if they are not variable. */
46584 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46585 true },
46586 /* Cdecl attribute says the callee is a normal C declaration */
46587 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46588 true },
46589 /* Regparm attribute specifies how many integer arguments are to be
46590 passed in registers. */
46591 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
46592 true },
46593 /* Sseregparm attribute says we are using x86_64 calling conventions
46594 for FP arguments. */
46595 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46596 true },
46597 /* The transactional memory builtins are implicitly regparm or fastcall
46598 depending on the ABI. Override the generic do-nothing attribute that
46599 these builtins were declared with. */
46600 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
46601 true },
46602 /* force_align_arg_pointer says this function realigns the stack at entry. */
46603 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
46604 false, true, true, ix86_handle_cconv_attribute, false },
46605 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46606 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
46607 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
46608 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
46609 false },
46610 #endif
46611 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46612 false },
46613 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46614 false },
46615 #ifdef SUBTARGET_ATTRIBUTE_TABLE
46616 SUBTARGET_ATTRIBUTE_TABLE,
46617 #endif
46618 /* ms_abi and sysv_abi calling convention function attributes. */
46619 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46620 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46621 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
46622 false },
46623 { "callee_pop_aggregate_return", 1, 1, false, true, true,
46624 ix86_handle_callee_pop_aggregate_return, true },
46625 /* End element. */
46626 { NULL, 0, 0, false, false, false, NULL, false }
46629 /* Implement targetm.vectorize.builtin_vectorization_cost. */
46630 static int
46631 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
46632 tree vectype, int)
46634 unsigned elements;
46636 switch (type_of_cost)
46638 case scalar_stmt:
46639 return ix86_cost->scalar_stmt_cost;
46641 case scalar_load:
46642 return ix86_cost->scalar_load_cost;
46644 case scalar_store:
46645 return ix86_cost->scalar_store_cost;
46647 case vector_stmt:
46648 return ix86_cost->vec_stmt_cost;
46650 case vector_load:
46651 return ix86_cost->vec_align_load_cost;
46653 case vector_store:
46654 return ix86_cost->vec_store_cost;
46656 case vec_to_scalar:
46657 return ix86_cost->vec_to_scalar_cost;
46659 case scalar_to_vec:
46660 return ix86_cost->scalar_to_vec_cost;
46662 case unaligned_load:
46663 case unaligned_store:
46664 return ix86_cost->vec_unalign_load_cost;
46666 case cond_branch_taken:
46667 return ix86_cost->cond_taken_branch_cost;
46669 case cond_branch_not_taken:
46670 return ix86_cost->cond_not_taken_branch_cost;
46672 case vec_perm:
46673 case vec_promote_demote:
46674 return ix86_cost->vec_stmt_cost;
46676 case vec_construct:
46677 elements = TYPE_VECTOR_SUBPARTS (vectype);
46678 return ix86_cost->vec_stmt_cost * (elements / 2 + 1);
46680 default:
46681 gcc_unreachable ();
46685 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
46686 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
46687 insn every time. */
46689 static GTY(()) rtx_insn *vselect_insn;
46691 /* Initialize vselect_insn. */
46693 static void
46694 init_vselect_insn (void)
46696 unsigned i;
46697 rtx x;
46699 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
46700 for (i = 0; i < MAX_VECT_LEN; ++i)
46701 XVECEXP (x, 0, i) = const0_rtx;
46702 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
46703 const0_rtx), x);
46704 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
46705 start_sequence ();
46706 vselect_insn = emit_insn (x);
46707 end_sequence ();
46710 /* Construct (set target (vec_select op0 (parallel perm))) and
46711 return true if that's a valid instruction in the active ISA. */
46713 static bool
46714 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
46715 unsigned nelt, bool testing_p)
46717 unsigned int i;
46718 rtx x, save_vconcat;
46719 int icode;
46721 if (vselect_insn == NULL_RTX)
46722 init_vselect_insn ();
46724 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
46725 PUT_NUM_ELEM (XVEC (x, 0), nelt);
46726 for (i = 0; i < nelt; ++i)
46727 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
46728 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46729 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
46730 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
46731 SET_DEST (PATTERN (vselect_insn)) = target;
46732 icode = recog_memoized (vselect_insn);
46734 if (icode >= 0 && !testing_p)
46735 emit_insn (copy_rtx (PATTERN (vselect_insn)));
46737 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
46738 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
46739 INSN_CODE (vselect_insn) = -1;
46741 return icode >= 0;
46744 /* Similar, but generate a vec_concat from op0 and op1 as well. */
46746 static bool
46747 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
46748 const unsigned char *perm, unsigned nelt,
46749 bool testing_p)
46751 machine_mode v2mode;
46752 rtx x;
46753 bool ok;
46755 if (vselect_insn == NULL_RTX)
46756 init_vselect_insn ();
46758 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
46759 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46760 PUT_MODE (x, v2mode);
46761 XEXP (x, 0) = op0;
46762 XEXP (x, 1) = op1;
46763 ok = expand_vselect (target, x, perm, nelt, testing_p);
46764 XEXP (x, 0) = const0_rtx;
46765 XEXP (x, 1) = const0_rtx;
46766 return ok;
46769 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46770 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
46772 static bool
46773 expand_vec_perm_blend (struct expand_vec_perm_d *d)
46775 machine_mode vmode = d->vmode;
46776 unsigned i, mask, nelt = d->nelt;
46777 rtx target, op0, op1, x;
46778 rtx rperm[32], vperm;
46780 if (d->one_operand_p)
46781 return false;
46782 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
46783 && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
46785 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
46787 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
46789 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
46791 else
46792 return false;
46794 /* This is a blend, not a permute. Elements must stay in their
46795 respective lanes. */
46796 for (i = 0; i < nelt; ++i)
46798 unsigned e = d->perm[i];
46799 if (!(e == i || e == i + nelt))
46800 return false;
46803 if (d->testing_p)
46804 return true;
46806 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
46807 decision should be extracted elsewhere, so that we only try that
46808 sequence once all budget==3 options have been tried. */
46809 target = d->target;
46810 op0 = d->op0;
46811 op1 = d->op1;
46812 mask = 0;
46814 switch (vmode)
46816 case V8DFmode:
46817 case V16SFmode:
46818 case V4DFmode:
46819 case V8SFmode:
46820 case V2DFmode:
46821 case V4SFmode:
46822 case V8HImode:
46823 case V8SImode:
46824 case V32HImode:
46825 case V64QImode:
46826 case V16SImode:
46827 case V8DImode:
46828 for (i = 0; i < nelt; ++i)
46829 mask |= (d->perm[i] >= nelt) << i;
46830 break;
46832 case V2DImode:
46833 for (i = 0; i < 2; ++i)
46834 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
46835 vmode = V8HImode;
46836 goto do_subreg;
46838 case V4SImode:
46839 for (i = 0; i < 4; ++i)
46840 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46841 vmode = V8HImode;
46842 goto do_subreg;
46844 case V16QImode:
46845 /* See if bytes move in pairs so we can use pblendw with
46846 an immediate argument, rather than pblendvb with a vector
46847 argument. */
46848 for (i = 0; i < 16; i += 2)
46849 if (d->perm[i] + 1 != d->perm[i + 1])
46851 use_pblendvb:
46852 for (i = 0; i < nelt; ++i)
46853 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
46855 finish_pblendvb:
46856 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
46857 vperm = force_reg (vmode, vperm);
46859 if (GET_MODE_SIZE (vmode) == 16)
46860 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
46861 else
46862 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
46863 if (target != d->target)
46864 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46865 return true;
46868 for (i = 0; i < 8; ++i)
46869 mask |= (d->perm[i * 2] >= 16) << i;
46870 vmode = V8HImode;
46871 /* FALLTHRU */
46873 do_subreg:
46874 target = gen_reg_rtx (vmode);
46875 op0 = gen_lowpart (vmode, op0);
46876 op1 = gen_lowpart (vmode, op1);
46877 break;
46879 case V32QImode:
46880 /* See if bytes move in pairs. If not, vpblendvb must be used. */
46881 for (i = 0; i < 32; i += 2)
46882 if (d->perm[i] + 1 != d->perm[i + 1])
46883 goto use_pblendvb;
46884 /* See if bytes move in quadruplets. If yes, vpblendd
46885 with immediate can be used. */
46886 for (i = 0; i < 32; i += 4)
46887 if (d->perm[i] + 2 != d->perm[i + 2])
46888 break;
46889 if (i < 32)
46891 /* See if bytes move the same in both lanes. If yes,
46892 vpblendw with immediate can be used. */
46893 for (i = 0; i < 16; i += 2)
46894 if (d->perm[i] + 16 != d->perm[i + 16])
46895 goto use_pblendvb;
46897 /* Use vpblendw. */
46898 for (i = 0; i < 16; ++i)
46899 mask |= (d->perm[i * 2] >= 32) << i;
46900 vmode = V16HImode;
46901 goto do_subreg;
46904 /* Use vpblendd. */
46905 for (i = 0; i < 8; ++i)
46906 mask |= (d->perm[i * 4] >= 32) << i;
46907 vmode = V8SImode;
46908 goto do_subreg;
46910 case V16HImode:
46911 /* See if words move in pairs. If yes, vpblendd can be used. */
46912 for (i = 0; i < 16; i += 2)
46913 if (d->perm[i] + 1 != d->perm[i + 1])
46914 break;
46915 if (i < 16)
46917 /* See if words move the same in both lanes. If not,
46918 vpblendvb must be used. */
46919 for (i = 0; i < 8; i++)
46920 if (d->perm[i] + 8 != d->perm[i + 8])
46922 /* Use vpblendvb. */
46923 for (i = 0; i < 32; ++i)
46924 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
46926 vmode = V32QImode;
46927 nelt = 32;
46928 target = gen_reg_rtx (vmode);
46929 op0 = gen_lowpart (vmode, op0);
46930 op1 = gen_lowpart (vmode, op1);
46931 goto finish_pblendvb;
46934 /* Use vpblendw. */
46935 for (i = 0; i < 16; ++i)
46936 mask |= (d->perm[i] >= 16) << i;
46937 break;
46940 /* Use vpblendd. */
46941 for (i = 0; i < 8; ++i)
46942 mask |= (d->perm[i * 2] >= 16) << i;
46943 vmode = V8SImode;
46944 goto do_subreg;
46946 case V4DImode:
46947 /* Use vpblendd. */
46948 for (i = 0; i < 4; ++i)
46949 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46950 vmode = V8SImode;
46951 goto do_subreg;
46953 default:
46954 gcc_unreachable ();
46957 /* This matches five different patterns with the different modes. */
46958 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
46959 x = gen_rtx_SET (VOIDmode, target, x);
46960 emit_insn (x);
46961 if (target != d->target)
46962 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46964 return true;
46967 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46968 in terms of the variable form of vpermilps.
46970 Note that we will have already failed the immediate input vpermilps,
46971 which requires that the high and low part shuffle be identical; the
46972 variable form doesn't require that. */
46974 static bool
46975 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
46977 rtx rperm[8], vperm;
46978 unsigned i;
46980 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
46981 return false;
46983 /* We can only permute within the 128-bit lane. */
46984 for (i = 0; i < 8; ++i)
46986 unsigned e = d->perm[i];
46987 if (i < 4 ? e >= 4 : e < 4)
46988 return false;
46991 if (d->testing_p)
46992 return true;
46994 for (i = 0; i < 8; ++i)
46996 unsigned e = d->perm[i];
46998 /* Within each 128-bit lane, the elements of op0 are numbered
46999 from 0 and the elements of op1 are numbered from 4. */
47000 if (e >= 8 + 4)
47001 e -= 8;
47002 else if (e >= 4)
47003 e -= 4;
47005 rperm[i] = GEN_INT (e);
47008 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
47009 vperm = force_reg (V8SImode, vperm);
47010 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
47012 return true;
47015 /* Return true if permutation D can be performed as VMODE permutation
47016 instead. */
47018 static bool
47019 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
47021 unsigned int i, j, chunk;
47023 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
47024 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
47025 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
47026 return false;
47028 if (GET_MODE_NUNITS (vmode) >= d->nelt)
47029 return true;
47031 chunk = d->nelt / GET_MODE_NUNITS (vmode);
47032 for (i = 0; i < d->nelt; i += chunk)
47033 if (d->perm[i] & (chunk - 1))
47034 return false;
47035 else
47036 for (j = 1; j < chunk; ++j)
47037 if (d->perm[i] + j != d->perm[i + j])
47038 return false;
47040 return true;
47043 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47044 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
47046 static bool
47047 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
47049 unsigned i, nelt, eltsz, mask;
47050 unsigned char perm[64];
47051 machine_mode vmode = V16QImode;
47052 rtx rperm[64], vperm, target, op0, op1;
47054 nelt = d->nelt;
47056 if (!d->one_operand_p)
47058 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
47060 if (TARGET_AVX2
47061 && valid_perm_using_mode_p (V2TImode, d))
47063 if (d->testing_p)
47064 return true;
47066 /* Use vperm2i128 insn. The pattern uses
47067 V4DImode instead of V2TImode. */
47068 target = d->target;
47069 if (d->vmode != V4DImode)
47070 target = gen_reg_rtx (V4DImode);
47071 op0 = gen_lowpart (V4DImode, d->op0);
47072 op1 = gen_lowpart (V4DImode, d->op1);
47073 rperm[0]
47074 = GEN_INT ((d->perm[0] / (nelt / 2))
47075 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
47076 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
47077 if (target != d->target)
47078 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47079 return true;
47081 return false;
47084 else
47086 if (GET_MODE_SIZE (d->vmode) == 16)
47088 if (!TARGET_SSSE3)
47089 return false;
47091 else if (GET_MODE_SIZE (d->vmode) == 32)
47093 if (!TARGET_AVX2)
47094 return false;
47096 /* V4DImode should be already handled through
47097 expand_vselect by vpermq instruction. */
47098 gcc_assert (d->vmode != V4DImode);
47100 vmode = V32QImode;
47101 if (d->vmode == V8SImode
47102 || d->vmode == V16HImode
47103 || d->vmode == V32QImode)
47105 /* First see if vpermq can be used for
47106 V8SImode/V16HImode/V32QImode. */
47107 if (valid_perm_using_mode_p (V4DImode, d))
47109 for (i = 0; i < 4; i++)
47110 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
47111 if (d->testing_p)
47112 return true;
47113 target = gen_reg_rtx (V4DImode);
47114 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
47115 perm, 4, false))
47117 emit_move_insn (d->target,
47118 gen_lowpart (d->vmode, target));
47119 return true;
47121 return false;
47124 /* Next see if vpermd can be used. */
47125 if (valid_perm_using_mode_p (V8SImode, d))
47126 vmode = V8SImode;
47128 /* Or if vpermps can be used. */
47129 else if (d->vmode == V8SFmode)
47130 vmode = V8SImode;
47132 if (vmode == V32QImode)
47134 /* vpshufb only works intra lanes, it is not
47135 possible to shuffle bytes in between the lanes. */
47136 for (i = 0; i < nelt; ++i)
47137 if ((d->perm[i] ^ i) & (nelt / 2))
47138 return false;
47141 else if (GET_MODE_SIZE (d->vmode) == 64)
47143 if (!TARGET_AVX512BW)
47144 return false;
47146 /* If vpermq didn't work, vpshufb won't work either. */
47147 if (d->vmode == V8DFmode || d->vmode == V8DImode)
47148 return false;
47150 vmode = V64QImode;
47151 if (d->vmode == V16SImode
47152 || d->vmode == V32HImode
47153 || d->vmode == V64QImode)
47155 /* First see if vpermq can be used for
47156 V16SImode/V32HImode/V64QImode. */
47157 if (valid_perm_using_mode_p (V8DImode, d))
47159 for (i = 0; i < 8; i++)
47160 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
47161 if (d->testing_p)
47162 return true;
47163 target = gen_reg_rtx (V8DImode);
47164 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
47165 perm, 8, false))
47167 emit_move_insn (d->target,
47168 gen_lowpart (d->vmode, target));
47169 return true;
47171 return false;
47174 /* Next see if vpermd can be used. */
47175 if (valid_perm_using_mode_p (V16SImode, d))
47176 vmode = V16SImode;
47178 /* Or if vpermps can be used. */
47179 else if (d->vmode == V16SFmode)
47180 vmode = V16SImode;
47181 if (vmode == V64QImode)
47183 /* vpshufb only works intra lanes, it is not
47184 possible to shuffle bytes in between the lanes. */
47185 for (i = 0; i < nelt; ++i)
47186 if ((d->perm[i] ^ i) & (nelt / 4))
47187 return false;
47190 else
47191 return false;
47194 if (d->testing_p)
47195 return true;
47197 if (vmode == V8SImode)
47198 for (i = 0; i < 8; ++i)
47199 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
47200 else if (vmode == V16SImode)
47201 for (i = 0; i < 16; ++i)
47202 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
47203 else
47205 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47206 if (!d->one_operand_p)
47207 mask = 2 * nelt - 1;
47208 else if (vmode == V16QImode)
47209 mask = nelt - 1;
47210 else if (vmode == V64QImode)
47211 mask = nelt / 4 - 1;
47212 else
47213 mask = nelt / 2 - 1;
47215 for (i = 0; i < nelt; ++i)
47217 unsigned j, e = d->perm[i] & mask;
47218 for (j = 0; j < eltsz; ++j)
47219 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
47223 vperm = gen_rtx_CONST_VECTOR (vmode,
47224 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
47225 vperm = force_reg (vmode, vperm);
47227 target = d->target;
47228 if (d->vmode != vmode)
47229 target = gen_reg_rtx (vmode);
47230 op0 = gen_lowpart (vmode, d->op0);
47231 if (d->one_operand_p)
47233 if (vmode == V16QImode)
47234 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
47235 else if (vmode == V32QImode)
47236 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
47237 else if (vmode == V64QImode)
47238 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
47239 else if (vmode == V8SFmode)
47240 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
47241 else if (vmode == V8SImode)
47242 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
47243 else if (vmode == V16SFmode)
47244 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
47245 else if (vmode == V16SImode)
47246 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
47247 else
47248 gcc_unreachable ();
47250 else
47252 op1 = gen_lowpart (vmode, d->op1);
47253 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
47255 if (target != d->target)
47256 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47258 return true;
47261 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
47262 in a single instruction. */
47264 static bool
47265 expand_vec_perm_1 (struct expand_vec_perm_d *d)
47267 unsigned i, nelt = d->nelt;
47268 unsigned char perm2[MAX_VECT_LEN];
47270 /* Check plain VEC_SELECT first, because AVX has instructions that could
47271 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47272 input where SEL+CONCAT may not. */
47273 if (d->one_operand_p)
47275 int mask = nelt - 1;
47276 bool identity_perm = true;
47277 bool broadcast_perm = true;
47279 for (i = 0; i < nelt; i++)
47281 perm2[i] = d->perm[i] & mask;
47282 if (perm2[i] != i)
47283 identity_perm = false;
47284 if (perm2[i])
47285 broadcast_perm = false;
47288 if (identity_perm)
47290 if (!d->testing_p)
47291 emit_move_insn (d->target, d->op0);
47292 return true;
47294 else if (broadcast_perm && TARGET_AVX2)
47296 /* Use vpbroadcast{b,w,d}. */
47297 rtx (*gen) (rtx, rtx) = NULL;
47298 switch (d->vmode)
47300 case V64QImode:
47301 if (TARGET_AVX512BW)
47302 gen = gen_avx512bw_vec_dupv64qi_1;
47303 break;
47304 case V32QImode:
47305 gen = gen_avx2_pbroadcastv32qi_1;
47306 break;
47307 case V32HImode:
47308 if (TARGET_AVX512BW)
47309 gen = gen_avx512bw_vec_dupv32hi_1;
47310 break;
47311 case V16HImode:
47312 gen = gen_avx2_pbroadcastv16hi_1;
47313 break;
47314 case V16SImode:
47315 if (TARGET_AVX512F)
47316 gen = gen_avx512f_vec_dupv16si_1;
47317 break;
47318 case V8SImode:
47319 gen = gen_avx2_pbroadcastv8si_1;
47320 break;
47321 case V16QImode:
47322 gen = gen_avx2_pbroadcastv16qi;
47323 break;
47324 case V8HImode:
47325 gen = gen_avx2_pbroadcastv8hi;
47326 break;
47327 case V16SFmode:
47328 if (TARGET_AVX512F)
47329 gen = gen_avx512f_vec_dupv16sf_1;
47330 break;
47331 case V8SFmode:
47332 gen = gen_avx2_vec_dupv8sf_1;
47333 break;
47334 case V8DFmode:
47335 if (TARGET_AVX512F)
47336 gen = gen_avx512f_vec_dupv8df_1;
47337 break;
47338 case V8DImode:
47339 if (TARGET_AVX512F)
47340 gen = gen_avx512f_vec_dupv8di_1;
47341 break;
47342 /* For other modes prefer other shuffles this function creates. */
47343 default: break;
47345 if (gen != NULL)
47347 if (!d->testing_p)
47348 emit_insn (gen (d->target, d->op0));
47349 return true;
47353 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47354 return true;
47356 /* There are plenty of patterns in sse.md that are written for
47357 SEL+CONCAT and are not replicated for a single op. Perhaps
47358 that should be changed, to avoid the nastiness here. */
47360 /* Recognize interleave style patterns, which means incrementing
47361 every other permutation operand. */
47362 for (i = 0; i < nelt; i += 2)
47364 perm2[i] = d->perm[i] & mask;
47365 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47367 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47368 d->testing_p))
47369 return true;
47371 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
47372 if (nelt >= 4)
47374 for (i = 0; i < nelt; i += 4)
47376 perm2[i + 0] = d->perm[i + 0] & mask;
47377 perm2[i + 1] = d->perm[i + 1] & mask;
47378 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47379 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47382 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47383 d->testing_p))
47384 return true;
47388 /* Finally, try the fully general two operand permute. */
47389 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47390 d->testing_p))
47391 return true;
47393 /* Recognize interleave style patterns with reversed operands. */
47394 if (!d->one_operand_p)
47396 for (i = 0; i < nelt; ++i)
47398 unsigned e = d->perm[i];
47399 if (e >= nelt)
47400 e -= nelt;
47401 else
47402 e += nelt;
47403 perm2[i] = e;
47406 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47407 d->testing_p))
47408 return true;
47411 /* Try the SSE4.1 blend variable merge instructions. */
47412 if (expand_vec_perm_blend (d))
47413 return true;
47415 /* Try one of the AVX vpermil variable permutations. */
47416 if (expand_vec_perm_vpermil (d))
47417 return true;
47419 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47420 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47421 if (expand_vec_perm_pshufb (d))
47422 return true;
47424 /* Try the AVX2 vpalignr instruction. */
47425 if (expand_vec_perm_palignr (d, true))
47426 return true;
47428 /* Try the AVX512F vpermi2 instructions. */
47429 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47430 return true;
47432 return false;
47435 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47436 in terms of a pair of pshuflw + pshufhw instructions. */
47438 static bool
47439 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47441 unsigned char perm2[MAX_VECT_LEN];
47442 unsigned i;
47443 bool ok;
47445 if (d->vmode != V8HImode || !d->one_operand_p)
47446 return false;
47448 /* The two permutations only operate in 64-bit lanes. */
47449 for (i = 0; i < 4; ++i)
47450 if (d->perm[i] >= 4)
47451 return false;
47452 for (i = 4; i < 8; ++i)
47453 if (d->perm[i] < 4)
47454 return false;
47456 if (d->testing_p)
47457 return true;
47459 /* Emit the pshuflw. */
47460 memcpy (perm2, d->perm, 4);
47461 for (i = 4; i < 8; ++i)
47462 perm2[i] = i;
47463 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47464 gcc_assert (ok);
47466 /* Emit the pshufhw. */
47467 memcpy (perm2 + 4, d->perm + 4, 4);
47468 for (i = 0; i < 4; ++i)
47469 perm2[i] = i;
47470 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47471 gcc_assert (ok);
47473 return true;
47476 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47477 the permutation using the SSSE3 palignr instruction. This succeeds
47478 when all of the elements in PERM fit within one vector and we merely
47479 need to shift them down so that a single vector permutation has a
47480 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47481 the vpalignr instruction itself can perform the requested permutation. */
47483 static bool
47484 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47486 unsigned i, nelt = d->nelt;
47487 unsigned min, max, minswap, maxswap;
47488 bool in_order, ok, swap = false;
47489 rtx shift, target;
47490 struct expand_vec_perm_d dcopy;
47492 /* Even with AVX, palignr only operates on 128-bit vectors,
47493 in AVX2 palignr operates on both 128-bit lanes. */
47494 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47495 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47496 return false;
47498 min = 2 * nelt;
47499 max = 0;
47500 minswap = 2 * nelt;
47501 maxswap = 0;
47502 for (i = 0; i < nelt; ++i)
47504 unsigned e = d->perm[i];
47505 unsigned eswap = d->perm[i] ^ nelt;
47506 if (GET_MODE_SIZE (d->vmode) == 32)
47508 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47509 eswap = e ^ (nelt / 2);
47511 if (e < min)
47512 min = e;
47513 if (e > max)
47514 max = e;
47515 if (eswap < minswap)
47516 minswap = eswap;
47517 if (eswap > maxswap)
47518 maxswap = eswap;
47520 if (min == 0
47521 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47523 if (d->one_operand_p
47524 || minswap == 0
47525 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47526 ? nelt / 2 : nelt))
47527 return false;
47528 swap = true;
47529 min = minswap;
47530 max = maxswap;
47533 /* Given that we have SSSE3, we know we'll be able to implement the
47534 single operand permutation after the palignr with pshufb for
47535 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47536 first. */
47537 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47538 return true;
47540 dcopy = *d;
47541 if (swap)
47543 dcopy.op0 = d->op1;
47544 dcopy.op1 = d->op0;
47545 for (i = 0; i < nelt; ++i)
47546 dcopy.perm[i] ^= nelt;
47549 in_order = true;
47550 for (i = 0; i < nelt; ++i)
47552 unsigned e = dcopy.perm[i];
47553 if (GET_MODE_SIZE (d->vmode) == 32
47554 && e >= nelt
47555 && (e & (nelt / 2 - 1)) < min)
47556 e = e - min - (nelt / 2);
47557 else
47558 e = e - min;
47559 if (e != i)
47560 in_order = false;
47561 dcopy.perm[i] = e;
47563 dcopy.one_operand_p = true;
47565 if (single_insn_only_p && !in_order)
47566 return false;
47568 /* For AVX2, test whether we can permute the result in one instruction. */
47569 if (d->testing_p)
47571 if (in_order)
47572 return true;
47573 dcopy.op1 = dcopy.op0;
47574 return expand_vec_perm_1 (&dcopy);
47577 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
47578 if (GET_MODE_SIZE (d->vmode) == 16)
47580 target = gen_reg_rtx (TImode);
47581 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47582 gen_lowpart (TImode, dcopy.op0), shift));
47584 else
47586 target = gen_reg_rtx (V2TImode);
47587 emit_insn (gen_avx2_palignrv2ti (target,
47588 gen_lowpart (V2TImode, dcopy.op1),
47589 gen_lowpart (V2TImode, dcopy.op0),
47590 shift));
47593 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47595 /* Test for the degenerate case where the alignment by itself
47596 produces the desired permutation. */
47597 if (in_order)
47599 emit_move_insn (d->target, dcopy.op0);
47600 return true;
47603 ok = expand_vec_perm_1 (&dcopy);
47604 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47606 return ok;
47609 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
47610 the permutation using the SSE4_1 pblendv instruction. Potentially
47611 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
47613 static bool
47614 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47616 unsigned i, which, nelt = d->nelt;
47617 struct expand_vec_perm_d dcopy, dcopy1;
47618 machine_mode vmode = d->vmode;
47619 bool ok;
47621 /* Use the same checks as in expand_vec_perm_blend. */
47622 if (d->one_operand_p)
47623 return false;
47624 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47626 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47628 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47630 else
47631 return false;
47633 /* Figure out where permutation elements stay not in their
47634 respective lanes. */
47635 for (i = 0, which = 0; i < nelt; ++i)
47637 unsigned e = d->perm[i];
47638 if (e != i)
47639 which |= (e < nelt ? 1 : 2);
47641 /* We can pblend the part where elements stay not in their
47642 respective lanes only when these elements are all in one
47643 half of a permutation.
47644 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
47645 lanes, but both 8 and 9 >= 8
47646 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
47647 respective lanes and 8 >= 8, but 2 not. */
47648 if (which != 1 && which != 2)
47649 return false;
47650 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
47651 return true;
47653 /* First we apply one operand permutation to the part where
47654 elements stay not in their respective lanes. */
47655 dcopy = *d;
47656 if (which == 2)
47657 dcopy.op0 = dcopy.op1 = d->op1;
47658 else
47659 dcopy.op0 = dcopy.op1 = d->op0;
47660 if (!d->testing_p)
47661 dcopy.target = gen_reg_rtx (vmode);
47662 dcopy.one_operand_p = true;
47664 for (i = 0; i < nelt; ++i)
47665 dcopy.perm[i] = d->perm[i] & (nelt - 1);
47667 ok = expand_vec_perm_1 (&dcopy);
47668 if (GET_MODE_SIZE (vmode) != 16 && !ok)
47669 return false;
47670 else
47671 gcc_assert (ok);
47672 if (d->testing_p)
47673 return true;
47675 /* Next we put permuted elements into their positions. */
47676 dcopy1 = *d;
47677 if (which == 2)
47678 dcopy1.op1 = dcopy.target;
47679 else
47680 dcopy1.op0 = dcopy.target;
47682 for (i = 0; i < nelt; ++i)
47683 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
47685 ok = expand_vec_perm_blend (&dcopy1);
47686 gcc_assert (ok);
47688 return true;
47691 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
47693 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47694 a two vector permutation into a single vector permutation by using
47695 an interleave operation to merge the vectors. */
47697 static bool
47698 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
47700 struct expand_vec_perm_d dremap, dfinal;
47701 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
47702 unsigned HOST_WIDE_INT contents;
47703 unsigned char remap[2 * MAX_VECT_LEN];
47704 rtx_insn *seq;
47705 bool ok, same_halves = false;
47707 if (GET_MODE_SIZE (d->vmode) == 16)
47709 if (d->one_operand_p)
47710 return false;
47712 else if (GET_MODE_SIZE (d->vmode) == 32)
47714 if (!TARGET_AVX)
47715 return false;
47716 /* For 32-byte modes allow even d->one_operand_p.
47717 The lack of cross-lane shuffling in some instructions
47718 might prevent a single insn shuffle. */
47719 dfinal = *d;
47720 dfinal.testing_p = true;
47721 /* If expand_vec_perm_interleave3 can expand this into
47722 a 3 insn sequence, give up and let it be expanded as
47723 3 insn sequence. While that is one insn longer,
47724 it doesn't need a memory operand and in the common
47725 case that both interleave low and high permutations
47726 with the same operands are adjacent needs 4 insns
47727 for both after CSE. */
47728 if (expand_vec_perm_interleave3 (&dfinal))
47729 return false;
47731 else
47732 return false;
47734 /* Examine from whence the elements come. */
47735 contents = 0;
47736 for (i = 0; i < nelt; ++i)
47737 contents |= HOST_WIDE_INT_1U << d->perm[i];
47739 memset (remap, 0xff, sizeof (remap));
47740 dremap = *d;
47742 if (GET_MODE_SIZE (d->vmode) == 16)
47744 unsigned HOST_WIDE_INT h1, h2, h3, h4;
47746 /* Split the two input vectors into 4 halves. */
47747 h1 = (HOST_WIDE_INT_1U << nelt2) - 1;
47748 h2 = h1 << nelt2;
47749 h3 = h2 << nelt2;
47750 h4 = h3 << nelt2;
47752 /* If the elements from the low halves use interleave low, and similarly
47753 for interleave high. If the elements are from mis-matched halves, we
47754 can use shufps for V4SF/V4SI or do a DImode shuffle. */
47755 if ((contents & (h1 | h3)) == contents)
47757 /* punpckl* */
47758 for (i = 0; i < nelt2; ++i)
47760 remap[i] = i * 2;
47761 remap[i + nelt] = i * 2 + 1;
47762 dremap.perm[i * 2] = i;
47763 dremap.perm[i * 2 + 1] = i + nelt;
47765 if (!TARGET_SSE2 && d->vmode == V4SImode)
47766 dremap.vmode = V4SFmode;
47768 else if ((contents & (h2 | h4)) == contents)
47770 /* punpckh* */
47771 for (i = 0; i < nelt2; ++i)
47773 remap[i + nelt2] = i * 2;
47774 remap[i + nelt + nelt2] = i * 2 + 1;
47775 dremap.perm[i * 2] = i + nelt2;
47776 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
47778 if (!TARGET_SSE2 && d->vmode == V4SImode)
47779 dremap.vmode = V4SFmode;
47781 else if ((contents & (h1 | h4)) == contents)
47783 /* shufps */
47784 for (i = 0; i < nelt2; ++i)
47786 remap[i] = i;
47787 remap[i + nelt + nelt2] = i + nelt2;
47788 dremap.perm[i] = i;
47789 dremap.perm[i + nelt2] = i + nelt + nelt2;
47791 if (nelt != 4)
47793 /* shufpd */
47794 dremap.vmode = V2DImode;
47795 dremap.nelt = 2;
47796 dremap.perm[0] = 0;
47797 dremap.perm[1] = 3;
47800 else if ((contents & (h2 | h3)) == contents)
47802 /* shufps */
47803 for (i = 0; i < nelt2; ++i)
47805 remap[i + nelt2] = i;
47806 remap[i + nelt] = i + nelt2;
47807 dremap.perm[i] = i + nelt2;
47808 dremap.perm[i + nelt2] = i + nelt;
47810 if (nelt != 4)
47812 /* shufpd */
47813 dremap.vmode = V2DImode;
47814 dremap.nelt = 2;
47815 dremap.perm[0] = 1;
47816 dremap.perm[1] = 2;
47819 else
47820 return false;
47822 else
47824 unsigned int nelt4 = nelt / 4, nzcnt = 0;
47825 unsigned HOST_WIDE_INT q[8];
47826 unsigned int nonzero_halves[4];
47828 /* Split the two input vectors into 8 quarters. */
47829 q[0] = (HOST_WIDE_INT_1U << nelt4) - 1;
47830 for (i = 1; i < 8; ++i)
47831 q[i] = q[0] << (nelt4 * i);
47832 for (i = 0; i < 4; ++i)
47833 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
47835 nonzero_halves[nzcnt] = i;
47836 ++nzcnt;
47839 if (nzcnt == 1)
47841 gcc_assert (d->one_operand_p);
47842 nonzero_halves[1] = nonzero_halves[0];
47843 same_halves = true;
47845 else if (d->one_operand_p)
47847 gcc_assert (nonzero_halves[0] == 0);
47848 gcc_assert (nonzero_halves[1] == 1);
47851 if (nzcnt <= 2)
47853 if (d->perm[0] / nelt2 == nonzero_halves[1])
47855 /* Attempt to increase the likelihood that dfinal
47856 shuffle will be intra-lane. */
47857 char tmph = nonzero_halves[0];
47858 nonzero_halves[0] = nonzero_halves[1];
47859 nonzero_halves[1] = tmph;
47862 /* vperm2f128 or vperm2i128. */
47863 for (i = 0; i < nelt2; ++i)
47865 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
47866 remap[i + nonzero_halves[0] * nelt2] = i;
47867 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
47868 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
47871 if (d->vmode != V8SFmode
47872 && d->vmode != V4DFmode
47873 && d->vmode != V8SImode)
47875 dremap.vmode = V8SImode;
47876 dremap.nelt = 8;
47877 for (i = 0; i < 4; ++i)
47879 dremap.perm[i] = i + nonzero_halves[0] * 4;
47880 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
47884 else if (d->one_operand_p)
47885 return false;
47886 else if (TARGET_AVX2
47887 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
47889 /* vpunpckl* */
47890 for (i = 0; i < nelt4; ++i)
47892 remap[i] = i * 2;
47893 remap[i + nelt] = i * 2 + 1;
47894 remap[i + nelt2] = i * 2 + nelt2;
47895 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
47896 dremap.perm[i * 2] = i;
47897 dremap.perm[i * 2 + 1] = i + nelt;
47898 dremap.perm[i * 2 + nelt2] = i + nelt2;
47899 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
47902 else if (TARGET_AVX2
47903 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
47905 /* vpunpckh* */
47906 for (i = 0; i < nelt4; ++i)
47908 remap[i + nelt4] = i * 2;
47909 remap[i + nelt + nelt4] = i * 2 + 1;
47910 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
47911 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
47912 dremap.perm[i * 2] = i + nelt4;
47913 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
47914 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
47915 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
47918 else
47919 return false;
47922 /* Use the remapping array set up above to move the elements from their
47923 swizzled locations into their final destinations. */
47924 dfinal = *d;
47925 for (i = 0; i < nelt; ++i)
47927 unsigned e = remap[d->perm[i]];
47928 gcc_assert (e < nelt);
47929 /* If same_halves is true, both halves of the remapped vector are the
47930 same. Avoid cross-lane accesses if possible. */
47931 if (same_halves && i >= nelt2)
47933 gcc_assert (e < nelt2);
47934 dfinal.perm[i] = e + nelt2;
47936 else
47937 dfinal.perm[i] = e;
47939 if (!d->testing_p)
47941 dremap.target = gen_reg_rtx (dremap.vmode);
47942 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47944 dfinal.op1 = dfinal.op0;
47945 dfinal.one_operand_p = true;
47947 /* Test if the final remap can be done with a single insn. For V4SFmode or
47948 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
47949 start_sequence ();
47950 ok = expand_vec_perm_1 (&dfinal);
47951 seq = get_insns ();
47952 end_sequence ();
47954 if (!ok)
47955 return false;
47957 if (d->testing_p)
47958 return true;
47960 if (dremap.vmode != dfinal.vmode)
47962 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
47963 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
47966 ok = expand_vec_perm_1 (&dremap);
47967 gcc_assert (ok);
47969 emit_insn (seq);
47970 return true;
47973 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47974 a single vector cross-lane permutation into vpermq followed
47975 by any of the single insn permutations. */
47977 static bool
47978 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
47980 struct expand_vec_perm_d dremap, dfinal;
47981 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
47982 unsigned contents[2];
47983 bool ok;
47985 if (!(TARGET_AVX2
47986 && (d->vmode == V32QImode || d->vmode == V16HImode)
47987 && d->one_operand_p))
47988 return false;
47990 contents[0] = 0;
47991 contents[1] = 0;
47992 for (i = 0; i < nelt2; ++i)
47994 contents[0] |= 1u << (d->perm[i] / nelt4);
47995 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
47998 for (i = 0; i < 2; ++i)
48000 unsigned int cnt = 0;
48001 for (j = 0; j < 4; ++j)
48002 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
48003 return false;
48006 if (d->testing_p)
48007 return true;
48009 dremap = *d;
48010 dremap.vmode = V4DImode;
48011 dremap.nelt = 4;
48012 dremap.target = gen_reg_rtx (V4DImode);
48013 dremap.op0 = gen_lowpart (V4DImode, d->op0);
48014 dremap.op1 = dremap.op0;
48015 dremap.one_operand_p = true;
48016 for (i = 0; i < 2; ++i)
48018 unsigned int cnt = 0;
48019 for (j = 0; j < 4; ++j)
48020 if ((contents[i] & (1u << j)) != 0)
48021 dremap.perm[2 * i + cnt++] = j;
48022 for (; cnt < 2; ++cnt)
48023 dremap.perm[2 * i + cnt] = 0;
48026 dfinal = *d;
48027 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48028 dfinal.op1 = dfinal.op0;
48029 dfinal.one_operand_p = true;
48030 for (i = 0, j = 0; i < nelt; ++i)
48032 if (i == nelt2)
48033 j = 2;
48034 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
48035 if ((d->perm[i] / nelt4) == dremap.perm[j])
48037 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
48038 dfinal.perm[i] |= nelt4;
48039 else
48040 gcc_unreachable ();
48043 ok = expand_vec_perm_1 (&dremap);
48044 gcc_assert (ok);
48046 ok = expand_vec_perm_1 (&dfinal);
48047 gcc_assert (ok);
48049 return true;
48052 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
48053 a vector permutation using two instructions, vperm2f128 resp.
48054 vperm2i128 followed by any single in-lane permutation. */
48056 static bool
48057 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
48059 struct expand_vec_perm_d dfirst, dsecond;
48060 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
48061 bool ok;
48063 if (!TARGET_AVX
48064 || GET_MODE_SIZE (d->vmode) != 32
48065 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
48066 return false;
48068 dsecond = *d;
48069 dsecond.one_operand_p = false;
48070 dsecond.testing_p = true;
48072 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
48073 immediate. For perm < 16 the second permutation uses
48074 d->op0 as first operand, for perm >= 16 it uses d->op1
48075 as first operand. The second operand is the result of
48076 vperm2[fi]128. */
48077 for (perm = 0; perm < 32; perm++)
48079 /* Ignore permutations which do not move anything cross-lane. */
48080 if (perm < 16)
48082 /* The second shuffle for e.g. V4DFmode has
48083 0123 and ABCD operands.
48084 Ignore AB23, as 23 is already in the second lane
48085 of the first operand. */
48086 if ((perm & 0xc) == (1 << 2)) continue;
48087 /* And 01CD, as 01 is in the first lane of the first
48088 operand. */
48089 if ((perm & 3) == 0) continue;
48090 /* And 4567, as then the vperm2[fi]128 doesn't change
48091 anything on the original 4567 second operand. */
48092 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
48094 else
48096 /* The second shuffle for e.g. V4DFmode has
48097 4567 and ABCD operands.
48098 Ignore AB67, as 67 is already in the second lane
48099 of the first operand. */
48100 if ((perm & 0xc) == (3 << 2)) continue;
48101 /* And 45CD, as 45 is in the first lane of the first
48102 operand. */
48103 if ((perm & 3) == 2) continue;
48104 /* And 0123, as then the vperm2[fi]128 doesn't change
48105 anything on the original 0123 first operand. */
48106 if ((perm & 0xf) == (1 << 2)) continue;
48109 for (i = 0; i < nelt; i++)
48111 j = d->perm[i] / nelt2;
48112 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
48113 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
48114 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
48115 dsecond.perm[i] = d->perm[i] & (nelt - 1);
48116 else
48117 break;
48120 if (i == nelt)
48122 start_sequence ();
48123 ok = expand_vec_perm_1 (&dsecond);
48124 end_sequence ();
48126 else
48127 ok = false;
48129 if (ok)
48131 if (d->testing_p)
48132 return true;
48134 /* Found a usable second shuffle. dfirst will be
48135 vperm2f128 on d->op0 and d->op1. */
48136 dsecond.testing_p = false;
48137 dfirst = *d;
48138 dfirst.target = gen_reg_rtx (d->vmode);
48139 for (i = 0; i < nelt; i++)
48140 dfirst.perm[i] = (i & (nelt2 - 1))
48141 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
48143 canonicalize_perm (&dfirst);
48144 ok = expand_vec_perm_1 (&dfirst);
48145 gcc_assert (ok);
48147 /* And dsecond is some single insn shuffle, taking
48148 d->op0 and result of vperm2f128 (if perm < 16) or
48149 d->op1 and result of vperm2f128 (otherwise). */
48150 if (perm >= 16)
48151 dsecond.op0 = dsecond.op1;
48152 dsecond.op1 = dfirst.target;
48154 ok = expand_vec_perm_1 (&dsecond);
48155 gcc_assert (ok);
48157 return true;
48160 /* For one operand, the only useful vperm2f128 permutation is 0x01
48161 aka lanes swap. */
48162 if (d->one_operand_p)
48163 return false;
48166 return false;
48169 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48170 a two vector permutation using 2 intra-lane interleave insns
48171 and cross-lane shuffle for 32-byte vectors. */
48173 static bool
48174 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
48176 unsigned i, nelt;
48177 rtx (*gen) (rtx, rtx, rtx);
48179 if (d->one_operand_p)
48180 return false;
48181 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
48183 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
48185 else
48186 return false;
48188 nelt = d->nelt;
48189 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
48190 return false;
48191 for (i = 0; i < nelt; i += 2)
48192 if (d->perm[i] != d->perm[0] + i / 2
48193 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
48194 return false;
48196 if (d->testing_p)
48197 return true;
48199 switch (d->vmode)
48201 case V32QImode:
48202 if (d->perm[0])
48203 gen = gen_vec_interleave_highv32qi;
48204 else
48205 gen = gen_vec_interleave_lowv32qi;
48206 break;
48207 case V16HImode:
48208 if (d->perm[0])
48209 gen = gen_vec_interleave_highv16hi;
48210 else
48211 gen = gen_vec_interleave_lowv16hi;
48212 break;
48213 case V8SImode:
48214 if (d->perm[0])
48215 gen = gen_vec_interleave_highv8si;
48216 else
48217 gen = gen_vec_interleave_lowv8si;
48218 break;
48219 case V4DImode:
48220 if (d->perm[0])
48221 gen = gen_vec_interleave_highv4di;
48222 else
48223 gen = gen_vec_interleave_lowv4di;
48224 break;
48225 case V8SFmode:
48226 if (d->perm[0])
48227 gen = gen_vec_interleave_highv8sf;
48228 else
48229 gen = gen_vec_interleave_lowv8sf;
48230 break;
48231 case V4DFmode:
48232 if (d->perm[0])
48233 gen = gen_vec_interleave_highv4df;
48234 else
48235 gen = gen_vec_interleave_lowv4df;
48236 break;
48237 default:
48238 gcc_unreachable ();
48241 emit_insn (gen (d->target, d->op0, d->op1));
48242 return true;
48245 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
48246 a single vector permutation using a single intra-lane vector
48247 permutation, vperm2f128 swapping the lanes and vblend* insn blending
48248 the non-swapped and swapped vectors together. */
48250 static bool
48251 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
48253 struct expand_vec_perm_d dfirst, dsecond;
48254 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
48255 rtx_insn *seq;
48256 bool ok;
48257 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
48259 if (!TARGET_AVX
48260 || TARGET_AVX2
48261 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
48262 || !d->one_operand_p)
48263 return false;
48265 dfirst = *d;
48266 for (i = 0; i < nelt; i++)
48267 dfirst.perm[i] = 0xff;
48268 for (i = 0, msk = 0; i < nelt; i++)
48270 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
48271 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
48272 return false;
48273 dfirst.perm[j] = d->perm[i];
48274 if (j != i)
48275 msk |= (1 << i);
48277 for (i = 0; i < nelt; i++)
48278 if (dfirst.perm[i] == 0xff)
48279 dfirst.perm[i] = i;
48281 if (!d->testing_p)
48282 dfirst.target = gen_reg_rtx (dfirst.vmode);
48284 start_sequence ();
48285 ok = expand_vec_perm_1 (&dfirst);
48286 seq = get_insns ();
48287 end_sequence ();
48289 if (!ok)
48290 return false;
48292 if (d->testing_p)
48293 return true;
48295 emit_insn (seq);
48297 dsecond = *d;
48298 dsecond.op0 = dfirst.target;
48299 dsecond.op1 = dfirst.target;
48300 dsecond.one_operand_p = true;
48301 dsecond.target = gen_reg_rtx (dsecond.vmode);
48302 for (i = 0; i < nelt; i++)
48303 dsecond.perm[i] = i ^ nelt2;
48305 ok = expand_vec_perm_1 (&dsecond);
48306 gcc_assert (ok);
48308 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48309 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48310 return true;
48313 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
48314 permutation using two vperm2f128, followed by a vshufpd insn blending
48315 the two vectors together. */
48317 static bool
48318 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48320 struct expand_vec_perm_d dfirst, dsecond, dthird;
48321 bool ok;
48323 if (!TARGET_AVX || (d->vmode != V4DFmode))
48324 return false;
48326 if (d->testing_p)
48327 return true;
48329 dfirst = *d;
48330 dsecond = *d;
48331 dthird = *d;
48333 dfirst.perm[0] = (d->perm[0] & ~1);
48334 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48335 dfirst.perm[2] = (d->perm[2] & ~1);
48336 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48337 dsecond.perm[0] = (d->perm[1] & ~1);
48338 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48339 dsecond.perm[2] = (d->perm[3] & ~1);
48340 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48341 dthird.perm[0] = (d->perm[0] % 2);
48342 dthird.perm[1] = (d->perm[1] % 2) + 4;
48343 dthird.perm[2] = (d->perm[2] % 2) + 2;
48344 dthird.perm[3] = (d->perm[3] % 2) + 6;
48346 dfirst.target = gen_reg_rtx (dfirst.vmode);
48347 dsecond.target = gen_reg_rtx (dsecond.vmode);
48348 dthird.op0 = dfirst.target;
48349 dthird.op1 = dsecond.target;
48350 dthird.one_operand_p = false;
48352 canonicalize_perm (&dfirst);
48353 canonicalize_perm (&dsecond);
48355 ok = expand_vec_perm_1 (&dfirst)
48356 && expand_vec_perm_1 (&dsecond)
48357 && expand_vec_perm_1 (&dthird);
48359 gcc_assert (ok);
48361 return true;
48364 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
48365 permutation with two pshufb insns and an ior. We should have already
48366 failed all two instruction sequences. */
48368 static bool
48369 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48371 rtx rperm[2][16], vperm, l, h, op, m128;
48372 unsigned int i, nelt, eltsz;
48374 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48375 return false;
48376 gcc_assert (!d->one_operand_p);
48378 if (d->testing_p)
48379 return true;
48381 nelt = d->nelt;
48382 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48384 /* Generate two permutation masks. If the required element is within
48385 the given vector it is shuffled into the proper lane. If the required
48386 element is in the other vector, force a zero into the lane by setting
48387 bit 7 in the permutation mask. */
48388 m128 = GEN_INT (-128);
48389 for (i = 0; i < nelt; ++i)
48391 unsigned j, e = d->perm[i];
48392 unsigned which = (e >= nelt);
48393 if (e >= nelt)
48394 e -= nelt;
48396 for (j = 0; j < eltsz; ++j)
48398 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48399 rperm[1-which][i*eltsz + j] = m128;
48403 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48404 vperm = force_reg (V16QImode, vperm);
48406 l = gen_reg_rtx (V16QImode);
48407 op = gen_lowpart (V16QImode, d->op0);
48408 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48410 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48411 vperm = force_reg (V16QImode, vperm);
48413 h = gen_reg_rtx (V16QImode);
48414 op = gen_lowpart (V16QImode, d->op1);
48415 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48417 op = d->target;
48418 if (d->vmode != V16QImode)
48419 op = gen_reg_rtx (V16QImode);
48420 emit_insn (gen_iorv16qi3 (op, l, h));
48421 if (op != d->target)
48422 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48424 return true;
48427 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48428 with two vpshufb insns, vpermq and vpor. We should have already failed
48429 all two or three instruction sequences. */
48431 static bool
48432 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48434 rtx rperm[2][32], vperm, l, h, hp, op, m128;
48435 unsigned int i, nelt, eltsz;
48437 if (!TARGET_AVX2
48438 || !d->one_operand_p
48439 || (d->vmode != V32QImode && d->vmode != V16HImode))
48440 return false;
48442 if (d->testing_p)
48443 return true;
48445 nelt = d->nelt;
48446 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48448 /* Generate two permutation masks. If the required element is within
48449 the same lane, it is shuffled in. If the required element from the
48450 other lane, force a zero by setting bit 7 in the permutation mask.
48451 In the other mask the mask has non-negative elements if element
48452 is requested from the other lane, but also moved to the other lane,
48453 so that the result of vpshufb can have the two V2TImode halves
48454 swapped. */
48455 m128 = GEN_INT (-128);
48456 for (i = 0; i < nelt; ++i)
48458 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48459 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48461 for (j = 0; j < eltsz; ++j)
48463 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48464 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48468 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48469 vperm = force_reg (V32QImode, vperm);
48471 h = gen_reg_rtx (V32QImode);
48472 op = gen_lowpart (V32QImode, d->op0);
48473 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48475 /* Swap the 128-byte lanes of h into hp. */
48476 hp = gen_reg_rtx (V4DImode);
48477 op = gen_lowpart (V4DImode, h);
48478 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48479 const1_rtx));
48481 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48482 vperm = force_reg (V32QImode, vperm);
48484 l = gen_reg_rtx (V32QImode);
48485 op = gen_lowpart (V32QImode, d->op0);
48486 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48488 op = d->target;
48489 if (d->vmode != V32QImode)
48490 op = gen_reg_rtx (V32QImode);
48491 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48492 if (op != d->target)
48493 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48495 return true;
48498 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48499 and extract-odd permutations of two V32QImode and V16QImode operand
48500 with two vpshufb insns, vpor and vpermq. We should have already
48501 failed all two or three instruction sequences. */
48503 static bool
48504 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48506 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48507 unsigned int i, nelt, eltsz;
48509 if (!TARGET_AVX2
48510 || d->one_operand_p
48511 || (d->vmode != V32QImode && d->vmode != V16HImode))
48512 return false;
48514 for (i = 0; i < d->nelt; ++i)
48515 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48516 return false;
48518 if (d->testing_p)
48519 return true;
48521 nelt = d->nelt;
48522 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48524 /* Generate two permutation masks. In the first permutation mask
48525 the first quarter will contain indexes for the first half
48526 of the op0, the second quarter will contain bit 7 set, third quarter
48527 will contain indexes for the second half of the op0 and the
48528 last quarter bit 7 set. In the second permutation mask
48529 the first quarter will contain bit 7 set, the second quarter
48530 indexes for the first half of the op1, the third quarter bit 7 set
48531 and last quarter indexes for the second half of the op1.
48532 I.e. the first mask e.g. for V32QImode extract even will be:
48533 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48534 (all values masked with 0xf except for -128) and second mask
48535 for extract even will be
48536 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48537 m128 = GEN_INT (-128);
48538 for (i = 0; i < nelt; ++i)
48540 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48541 unsigned which = d->perm[i] >= nelt;
48542 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48544 for (j = 0; j < eltsz; ++j)
48546 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48547 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48551 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48552 vperm = force_reg (V32QImode, vperm);
48554 l = gen_reg_rtx (V32QImode);
48555 op = gen_lowpart (V32QImode, d->op0);
48556 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48558 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48559 vperm = force_reg (V32QImode, vperm);
48561 h = gen_reg_rtx (V32QImode);
48562 op = gen_lowpart (V32QImode, d->op1);
48563 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48565 ior = gen_reg_rtx (V32QImode);
48566 emit_insn (gen_iorv32qi3 (ior, l, h));
48568 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48569 op = gen_reg_rtx (V4DImode);
48570 ior = gen_lowpart (V4DImode, ior);
48571 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48572 const1_rtx, GEN_INT (3)));
48573 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48575 return true;
48578 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48579 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48580 with two "and" and "pack" or two "shift" and "pack" insns. We should
48581 have already failed all two instruction sequences. */
48583 static bool
48584 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48586 rtx op, dop0, dop1, t, rperm[16];
48587 unsigned i, odd, c, s, nelt = d->nelt;
48588 bool end_perm = false;
48589 machine_mode half_mode;
48590 rtx (*gen_and) (rtx, rtx, rtx);
48591 rtx (*gen_pack) (rtx, rtx, rtx);
48592 rtx (*gen_shift) (rtx, rtx, rtx);
48594 if (d->one_operand_p)
48595 return false;
48597 switch (d->vmode)
48599 case V8HImode:
48600 /* Required for "pack". */
48601 if (!TARGET_SSE4_1)
48602 return false;
48603 c = 0xffff;
48604 s = 16;
48605 half_mode = V4SImode;
48606 gen_and = gen_andv4si3;
48607 gen_pack = gen_sse4_1_packusdw;
48608 gen_shift = gen_lshrv4si3;
48609 break;
48610 case V16QImode:
48611 /* No check as all instructions are SSE2. */
48612 c = 0xff;
48613 s = 8;
48614 half_mode = V8HImode;
48615 gen_and = gen_andv8hi3;
48616 gen_pack = gen_sse2_packuswb;
48617 gen_shift = gen_lshrv8hi3;
48618 break;
48619 case V16HImode:
48620 if (!TARGET_AVX2)
48621 return false;
48622 c = 0xffff;
48623 s = 16;
48624 half_mode = V8SImode;
48625 gen_and = gen_andv8si3;
48626 gen_pack = gen_avx2_packusdw;
48627 gen_shift = gen_lshrv8si3;
48628 end_perm = true;
48629 break;
48630 case V32QImode:
48631 if (!TARGET_AVX2)
48632 return false;
48633 c = 0xff;
48634 s = 8;
48635 half_mode = V16HImode;
48636 gen_and = gen_andv16hi3;
48637 gen_pack = gen_avx2_packuswb;
48638 gen_shift = gen_lshrv16hi3;
48639 end_perm = true;
48640 break;
48641 default:
48642 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
48643 general shuffles. */
48644 return false;
48647 /* Check that permutation is even or odd. */
48648 odd = d->perm[0];
48649 if (odd > 1)
48650 return false;
48652 for (i = 1; i < nelt; ++i)
48653 if (d->perm[i] != 2 * i + odd)
48654 return false;
48656 if (d->testing_p)
48657 return true;
48659 dop0 = gen_reg_rtx (half_mode);
48660 dop1 = gen_reg_rtx (half_mode);
48661 if (odd == 0)
48663 for (i = 0; i < nelt / 2; i++)
48664 rperm[i] = GEN_INT (c);
48665 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
48666 t = force_reg (half_mode, t);
48667 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
48668 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
48670 else
48672 emit_insn (gen_shift (dop0,
48673 gen_lowpart (half_mode, d->op0),
48674 GEN_INT (s)));
48675 emit_insn (gen_shift (dop1,
48676 gen_lowpart (half_mode, d->op1),
48677 GEN_INT (s)));
48679 /* In AVX2 for 256 bit case we need to permute pack result. */
48680 if (TARGET_AVX2 && end_perm)
48682 op = gen_reg_rtx (d->vmode);
48683 t = gen_reg_rtx (V4DImode);
48684 emit_insn (gen_pack (op, dop0, dop1));
48685 emit_insn (gen_avx2_permv4di_1 (t,
48686 gen_lowpart (V4DImode, op),
48687 const0_rtx,
48688 const2_rtx,
48689 const1_rtx,
48690 GEN_INT (3)));
48691 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
48693 else
48694 emit_insn (gen_pack (d->target, dop0, dop1));
48696 return true;
48699 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
48700 and extract-odd permutations. */
48702 static bool
48703 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
48705 rtx t1, t2, t3, t4, t5;
48707 switch (d->vmode)
48709 case V4DFmode:
48710 if (d->testing_p)
48711 break;
48712 t1 = gen_reg_rtx (V4DFmode);
48713 t2 = gen_reg_rtx (V4DFmode);
48715 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48716 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
48717 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
48719 /* Now an unpck[lh]pd will produce the result required. */
48720 if (odd)
48721 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
48722 else
48723 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
48724 emit_insn (t3);
48725 break;
48727 case V8SFmode:
48729 int mask = odd ? 0xdd : 0x88;
48731 if (d->testing_p)
48732 break;
48733 t1 = gen_reg_rtx (V8SFmode);
48734 t2 = gen_reg_rtx (V8SFmode);
48735 t3 = gen_reg_rtx (V8SFmode);
48737 /* Shuffle within the 128-bit lanes to produce:
48738 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
48739 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
48740 GEN_INT (mask)));
48742 /* Shuffle the lanes around to produce:
48743 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
48744 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
48745 GEN_INT (0x3)));
48747 /* Shuffle within the 128-bit lanes to produce:
48748 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
48749 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
48751 /* Shuffle within the 128-bit lanes to produce:
48752 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
48753 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
48755 /* Shuffle the lanes around to produce:
48756 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
48757 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
48758 GEN_INT (0x20)));
48760 break;
48762 case V2DFmode:
48763 case V4SFmode:
48764 case V2DImode:
48765 case V4SImode:
48766 /* These are always directly implementable by expand_vec_perm_1. */
48767 gcc_unreachable ();
48769 case V8HImode:
48770 if (TARGET_SSE4_1)
48771 return expand_vec_perm_even_odd_pack (d);
48772 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
48773 return expand_vec_perm_pshufb2 (d);
48774 else
48776 if (d->testing_p)
48777 break;
48778 /* We need 2*log2(N)-1 operations to achieve odd/even
48779 with interleave. */
48780 t1 = gen_reg_rtx (V8HImode);
48781 t2 = gen_reg_rtx (V8HImode);
48782 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
48783 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
48784 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
48785 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
48786 if (odd)
48787 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
48788 else
48789 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
48790 emit_insn (t3);
48792 break;
48794 case V16QImode:
48795 return expand_vec_perm_even_odd_pack (d);
48797 case V16HImode:
48798 case V32QImode:
48799 return expand_vec_perm_even_odd_pack (d);
48801 case V4DImode:
48802 if (!TARGET_AVX2)
48804 struct expand_vec_perm_d d_copy = *d;
48805 d_copy.vmode = V4DFmode;
48806 if (d->testing_p)
48807 d_copy.target = gen_lowpart (V4DFmode, d->target);
48808 else
48809 d_copy.target = gen_reg_rtx (V4DFmode);
48810 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
48811 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
48812 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48814 if (!d->testing_p)
48815 emit_move_insn (d->target,
48816 gen_lowpart (V4DImode, d_copy.target));
48817 return true;
48819 return false;
48822 if (d->testing_p)
48823 break;
48825 t1 = gen_reg_rtx (V4DImode);
48826 t2 = gen_reg_rtx (V4DImode);
48828 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48829 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
48830 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
48832 /* Now an vpunpck[lh]qdq will produce the result required. */
48833 if (odd)
48834 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
48835 else
48836 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
48837 emit_insn (t3);
48838 break;
48840 case V8SImode:
48841 if (!TARGET_AVX2)
48843 struct expand_vec_perm_d d_copy = *d;
48844 d_copy.vmode = V8SFmode;
48845 if (d->testing_p)
48846 d_copy.target = gen_lowpart (V8SFmode, d->target);
48847 else
48848 d_copy.target = gen_reg_rtx (V8SFmode);
48849 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
48850 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
48851 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48853 if (!d->testing_p)
48854 emit_move_insn (d->target,
48855 gen_lowpart (V8SImode, d_copy.target));
48856 return true;
48858 return false;
48861 if (d->testing_p)
48862 break;
48864 t1 = gen_reg_rtx (V8SImode);
48865 t2 = gen_reg_rtx (V8SImode);
48866 t3 = gen_reg_rtx (V4DImode);
48867 t4 = gen_reg_rtx (V4DImode);
48868 t5 = gen_reg_rtx (V4DImode);
48870 /* Shuffle the lanes around into
48871 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
48872 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
48873 gen_lowpart (V4DImode, d->op1),
48874 GEN_INT (0x20)));
48875 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
48876 gen_lowpart (V4DImode, d->op1),
48877 GEN_INT (0x31)));
48879 /* Swap the 2nd and 3rd position in each lane into
48880 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
48881 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
48882 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48883 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
48884 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48886 /* Now an vpunpck[lh]qdq will produce
48887 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
48888 if (odd)
48889 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
48890 gen_lowpart (V4DImode, t2));
48891 else
48892 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
48893 gen_lowpart (V4DImode, t2));
48894 emit_insn (t3);
48895 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
48896 break;
48898 default:
48899 gcc_unreachable ();
48902 return true;
48905 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48906 extract-even and extract-odd permutations. */
48908 static bool
48909 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
48911 unsigned i, odd, nelt = d->nelt;
48913 odd = d->perm[0];
48914 if (odd != 0 && odd != 1)
48915 return false;
48917 for (i = 1; i < nelt; ++i)
48918 if (d->perm[i] != 2 * i + odd)
48919 return false;
48921 return expand_vec_perm_even_odd_1 (d, odd);
48924 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
48925 permutations. We assume that expand_vec_perm_1 has already failed. */
48927 static bool
48928 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
48930 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
48931 machine_mode vmode = d->vmode;
48932 unsigned char perm2[4];
48933 rtx op0 = d->op0, dest;
48934 bool ok;
48936 switch (vmode)
48938 case V4DFmode:
48939 case V8SFmode:
48940 /* These are special-cased in sse.md so that we can optionally
48941 use the vbroadcast instruction. They expand to two insns
48942 if the input happens to be in a register. */
48943 gcc_unreachable ();
48945 case V2DFmode:
48946 case V2DImode:
48947 case V4SFmode:
48948 case V4SImode:
48949 /* These are always implementable using standard shuffle patterns. */
48950 gcc_unreachable ();
48952 case V8HImode:
48953 case V16QImode:
48954 /* These can be implemented via interleave. We save one insn by
48955 stopping once we have promoted to V4SImode and then use pshufd. */
48956 if (d->testing_p)
48957 return true;
48960 rtx dest;
48961 rtx (*gen) (rtx, rtx, rtx)
48962 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
48963 : gen_vec_interleave_lowv8hi;
48965 if (elt >= nelt2)
48967 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
48968 : gen_vec_interleave_highv8hi;
48969 elt -= nelt2;
48971 nelt2 /= 2;
48973 dest = gen_reg_rtx (vmode);
48974 emit_insn (gen (dest, op0, op0));
48975 vmode = get_mode_wider_vector (vmode);
48976 op0 = gen_lowpart (vmode, dest);
48978 while (vmode != V4SImode);
48980 memset (perm2, elt, 4);
48981 dest = gen_reg_rtx (V4SImode);
48982 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
48983 gcc_assert (ok);
48984 if (!d->testing_p)
48985 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
48986 return true;
48988 case V64QImode:
48989 case V32QImode:
48990 case V16HImode:
48991 case V8SImode:
48992 case V4DImode:
48993 /* For AVX2 broadcasts of the first element vpbroadcast* or
48994 vpermq should be used by expand_vec_perm_1. */
48995 gcc_assert (!TARGET_AVX2 || d->perm[0]);
48996 return false;
48998 default:
48999 gcc_unreachable ();
49003 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
49004 broadcast permutations. */
49006 static bool
49007 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
49009 unsigned i, elt, nelt = d->nelt;
49011 if (!d->one_operand_p)
49012 return false;
49014 elt = d->perm[0];
49015 for (i = 1; i < nelt; ++i)
49016 if (d->perm[i] != elt)
49017 return false;
49019 return expand_vec_perm_broadcast_1 (d);
49022 /* Implement arbitrary permutations of two V64QImode operands
49023 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
49024 static bool
49025 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
49027 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
49028 return false;
49030 if (d->testing_p)
49031 return true;
49033 struct expand_vec_perm_d ds[2];
49034 rtx rperm[128], vperm, target0, target1;
49035 unsigned int i, nelt;
49036 machine_mode vmode;
49038 nelt = d->nelt;
49039 vmode = V64QImode;
49041 for (i = 0; i < 2; i++)
49043 ds[i] = *d;
49044 ds[i].vmode = V32HImode;
49045 ds[i].nelt = 32;
49046 ds[i].target = gen_reg_rtx (V32HImode);
49047 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
49048 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
49051 /* Prepare permutations such that the first one takes care of
49052 putting the even bytes into the right positions or one higher
49053 positions (ds[0]) and the second one takes care of
49054 putting the odd bytes into the right positions or one below
49055 (ds[1]). */
49057 for (i = 0; i < nelt; i++)
49059 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
49060 if (i & 1)
49062 rperm[i] = constm1_rtx;
49063 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49065 else
49067 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49068 rperm[i + 64] = constm1_rtx;
49072 bool ok = expand_vec_perm_1 (&ds[0]);
49073 gcc_assert (ok);
49074 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
49076 ok = expand_vec_perm_1 (&ds[1]);
49077 gcc_assert (ok);
49078 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
49080 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
49081 vperm = force_reg (vmode, vperm);
49082 target0 = gen_reg_rtx (V64QImode);
49083 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
49085 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
49086 vperm = force_reg (vmode, vperm);
49087 target1 = gen_reg_rtx (V64QImode);
49088 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
49090 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
49091 return true;
49094 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
49095 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
49096 all the shorter instruction sequences. */
49098 static bool
49099 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
49101 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
49102 unsigned int i, nelt, eltsz;
49103 bool used[4];
49105 if (!TARGET_AVX2
49106 || d->one_operand_p
49107 || (d->vmode != V32QImode && d->vmode != V16HImode))
49108 return false;
49110 if (d->testing_p)
49111 return true;
49113 nelt = d->nelt;
49114 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
49116 /* Generate 4 permutation masks. If the required element is within
49117 the same lane, it is shuffled in. If the required element from the
49118 other lane, force a zero by setting bit 7 in the permutation mask.
49119 In the other mask the mask has non-negative elements if element
49120 is requested from the other lane, but also moved to the other lane,
49121 so that the result of vpshufb can have the two V2TImode halves
49122 swapped. */
49123 m128 = GEN_INT (-128);
49124 for (i = 0; i < 32; ++i)
49126 rperm[0][i] = m128;
49127 rperm[1][i] = m128;
49128 rperm[2][i] = m128;
49129 rperm[3][i] = m128;
49131 used[0] = false;
49132 used[1] = false;
49133 used[2] = false;
49134 used[3] = false;
49135 for (i = 0; i < nelt; ++i)
49137 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
49138 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
49139 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
49141 for (j = 0; j < eltsz; ++j)
49142 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
49143 used[which] = true;
49146 for (i = 0; i < 2; ++i)
49148 if (!used[2 * i + 1])
49150 h[i] = NULL_RTX;
49151 continue;
49153 vperm = gen_rtx_CONST_VECTOR (V32QImode,
49154 gen_rtvec_v (32, rperm[2 * i + 1]));
49155 vperm = force_reg (V32QImode, vperm);
49156 h[i] = gen_reg_rtx (V32QImode);
49157 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49158 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
49161 /* Swap the 128-byte lanes of h[X]. */
49162 for (i = 0; i < 2; ++i)
49164 if (h[i] == NULL_RTX)
49165 continue;
49166 op = gen_reg_rtx (V4DImode);
49167 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
49168 const2_rtx, GEN_INT (3), const0_rtx,
49169 const1_rtx));
49170 h[i] = gen_lowpart (V32QImode, op);
49173 for (i = 0; i < 2; ++i)
49175 if (!used[2 * i])
49177 l[i] = NULL_RTX;
49178 continue;
49180 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
49181 vperm = force_reg (V32QImode, vperm);
49182 l[i] = gen_reg_rtx (V32QImode);
49183 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49184 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
49187 for (i = 0; i < 2; ++i)
49189 if (h[i] && l[i])
49191 op = gen_reg_rtx (V32QImode);
49192 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
49193 l[i] = op;
49195 else if (h[i])
49196 l[i] = h[i];
49199 gcc_assert (l[0] && l[1]);
49200 op = d->target;
49201 if (d->vmode != V32QImode)
49202 op = gen_reg_rtx (V32QImode);
49203 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
49204 if (op != d->target)
49205 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49206 return true;
49209 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
49210 With all of the interface bits taken care of, perform the expansion
49211 in D and return true on success. */
49213 static bool
49214 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
49216 /* Try a single instruction expansion. */
49217 if (expand_vec_perm_1 (d))
49218 return true;
49220 /* Try sequences of two instructions. */
49222 if (expand_vec_perm_pshuflw_pshufhw (d))
49223 return true;
49225 if (expand_vec_perm_palignr (d, false))
49226 return true;
49228 if (expand_vec_perm_interleave2 (d))
49229 return true;
49231 if (expand_vec_perm_broadcast (d))
49232 return true;
49234 if (expand_vec_perm_vpermq_perm_1 (d))
49235 return true;
49237 if (expand_vec_perm_vperm2f128 (d))
49238 return true;
49240 if (expand_vec_perm_pblendv (d))
49241 return true;
49243 /* Try sequences of three instructions. */
49245 if (expand_vec_perm_even_odd_pack (d))
49246 return true;
49248 if (expand_vec_perm_2vperm2f128_vshuf (d))
49249 return true;
49251 if (expand_vec_perm_pshufb2 (d))
49252 return true;
49254 if (expand_vec_perm_interleave3 (d))
49255 return true;
49257 if (expand_vec_perm_vperm2f128_vblend (d))
49258 return true;
49260 /* Try sequences of four instructions. */
49262 if (expand_vec_perm_vpshufb2_vpermq (d))
49263 return true;
49265 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
49266 return true;
49268 if (expand_vec_perm_vpermi2_vpshub2 (d))
49269 return true;
49271 /* ??? Look for narrow permutations whose element orderings would
49272 allow the promotion to a wider mode. */
49274 /* ??? Look for sequences of interleave or a wider permute that place
49275 the data into the correct lanes for a half-vector shuffle like
49276 pshuf[lh]w or vpermilps. */
49278 /* ??? Look for sequences of interleave that produce the desired results.
49279 The combinatorics of punpck[lh] get pretty ugly... */
49281 if (expand_vec_perm_even_odd (d))
49282 return true;
49284 /* Even longer sequences. */
49285 if (expand_vec_perm_vpshufb4_vpermq2 (d))
49286 return true;
49288 return false;
49291 /* If a permutation only uses one operand, make it clear. Returns true
49292 if the permutation references both operands. */
49294 static bool
49295 canonicalize_perm (struct expand_vec_perm_d *d)
49297 int i, which, nelt = d->nelt;
49299 for (i = which = 0; i < nelt; ++i)
49300 which |= (d->perm[i] < nelt ? 1 : 2);
49302 d->one_operand_p = true;
49303 switch (which)
49305 default:
49306 gcc_unreachable();
49308 case 3:
49309 if (!rtx_equal_p (d->op0, d->op1))
49311 d->one_operand_p = false;
49312 break;
49314 /* The elements of PERM do not suggest that only the first operand
49315 is used, but both operands are identical. Allow easier matching
49316 of the permutation by folding the permutation into the single
49317 input vector. */
49318 /* FALLTHRU */
49320 case 2:
49321 for (i = 0; i < nelt; ++i)
49322 d->perm[i] &= nelt - 1;
49323 d->op0 = d->op1;
49324 break;
49326 case 1:
49327 d->op1 = d->op0;
49328 break;
49331 return (which == 3);
49334 bool
49335 ix86_expand_vec_perm_const (rtx operands[4])
49337 struct expand_vec_perm_d d;
49338 unsigned char perm[MAX_VECT_LEN];
49339 int i, nelt;
49340 bool two_args;
49341 rtx sel;
49343 d.target = operands[0];
49344 d.op0 = operands[1];
49345 d.op1 = operands[2];
49346 sel = operands[3];
49348 d.vmode = GET_MODE (d.target);
49349 gcc_assert (VECTOR_MODE_P (d.vmode));
49350 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49351 d.testing_p = false;
49353 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
49354 gcc_assert (XVECLEN (sel, 0) == nelt);
49355 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49357 for (i = 0; i < nelt; ++i)
49359 rtx e = XVECEXP (sel, 0, i);
49360 int ei = INTVAL (e) & (2 * nelt - 1);
49361 d.perm[i] = ei;
49362 perm[i] = ei;
49365 two_args = canonicalize_perm (&d);
49367 if (ix86_expand_vec_perm_const_1 (&d))
49368 return true;
49370 /* If the selector says both arguments are needed, but the operands are the
49371 same, the above tried to expand with one_operand_p and flattened selector.
49372 If that didn't work, retry without one_operand_p; we succeeded with that
49373 during testing. */
49374 if (two_args && d.one_operand_p)
49376 d.one_operand_p = false;
49377 memcpy (d.perm, perm, sizeof (perm));
49378 return ix86_expand_vec_perm_const_1 (&d);
49381 return false;
49384 /* Implement targetm.vectorize.vec_perm_const_ok. */
49386 static bool
49387 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
49388 const unsigned char *sel)
49390 struct expand_vec_perm_d d;
49391 unsigned int i, nelt, which;
49392 bool ret;
49394 d.vmode = vmode;
49395 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49396 d.testing_p = true;
49398 /* Given sufficient ISA support we can just return true here
49399 for selected vector modes. */
49400 switch (d.vmode)
49402 case V16SFmode:
49403 case V16SImode:
49404 case V8DImode:
49405 case V8DFmode:
49406 if (TARGET_AVX512F)
49407 /* All implementable with a single vpermi2 insn. */
49408 return true;
49409 break;
49410 case V32HImode:
49411 if (TARGET_AVX512BW)
49412 /* All implementable with a single vpermi2 insn. */
49413 return true;
49414 break;
49415 case V64QImode:
49416 if (TARGET_AVX512BW)
49417 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
49418 return true;
49419 break;
49420 case V8SImode:
49421 case V8SFmode:
49422 case V4DFmode:
49423 case V4DImode:
49424 if (TARGET_AVX512VL)
49425 /* All implementable with a single vpermi2 insn. */
49426 return true;
49427 break;
49428 case V16HImode:
49429 if (TARGET_AVX2)
49430 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49431 return true;
49432 break;
49433 case V32QImode:
49434 if (TARGET_AVX2)
49435 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49436 return true;
49437 break;
49438 case V4SImode:
49439 case V4SFmode:
49440 case V8HImode:
49441 case V16QImode:
49442 /* All implementable with a single vpperm insn. */
49443 if (TARGET_XOP)
49444 return true;
49445 /* All implementable with 2 pshufb + 1 ior. */
49446 if (TARGET_SSSE3)
49447 return true;
49448 break;
49449 case V2DImode:
49450 case V2DFmode:
49451 /* All implementable with shufpd or unpck[lh]pd. */
49452 return true;
49453 default:
49454 return false;
49457 /* Extract the values from the vector CST into the permutation
49458 array in D. */
49459 memcpy (d.perm, sel, nelt);
49460 for (i = which = 0; i < nelt; ++i)
49462 unsigned char e = d.perm[i];
49463 gcc_assert (e < 2 * nelt);
49464 which |= (e < nelt ? 1 : 2);
49467 /* For all elements from second vector, fold the elements to first. */
49468 if (which == 2)
49469 for (i = 0; i < nelt; ++i)
49470 d.perm[i] -= nelt;
49472 /* Check whether the mask can be applied to the vector type. */
49473 d.one_operand_p = (which != 3);
49475 /* Implementable with shufps or pshufd. */
49476 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49477 return true;
49479 /* Otherwise we have to go through the motions and see if we can
49480 figure out how to generate the requested permutation. */
49481 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49482 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49483 if (!d.one_operand_p)
49484 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49486 start_sequence ();
49487 ret = ix86_expand_vec_perm_const_1 (&d);
49488 end_sequence ();
49490 return ret;
49493 void
49494 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49496 struct expand_vec_perm_d d;
49497 unsigned i, nelt;
49499 d.target = targ;
49500 d.op0 = op0;
49501 d.op1 = op1;
49502 d.vmode = GET_MODE (targ);
49503 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49504 d.one_operand_p = false;
49505 d.testing_p = false;
49507 for (i = 0; i < nelt; ++i)
49508 d.perm[i] = i * 2 + odd;
49510 /* We'll either be able to implement the permutation directly... */
49511 if (expand_vec_perm_1 (&d))
49512 return;
49514 /* ... or we use the special-case patterns. */
49515 expand_vec_perm_even_odd_1 (&d, odd);
49518 static void
49519 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49521 struct expand_vec_perm_d d;
49522 unsigned i, nelt, base;
49523 bool ok;
49525 d.target = targ;
49526 d.op0 = op0;
49527 d.op1 = op1;
49528 d.vmode = GET_MODE (targ);
49529 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49530 d.one_operand_p = false;
49531 d.testing_p = false;
49533 base = high_p ? nelt / 2 : 0;
49534 for (i = 0; i < nelt / 2; ++i)
49536 d.perm[i * 2] = i + base;
49537 d.perm[i * 2 + 1] = i + base + nelt;
49540 /* Note that for AVX this isn't one instruction. */
49541 ok = ix86_expand_vec_perm_const_1 (&d);
49542 gcc_assert (ok);
49546 /* Expand a vector operation CODE for a V*QImode in terms of the
49547 same operation on V*HImode. */
49549 void
49550 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49552 machine_mode qimode = GET_MODE (dest);
49553 machine_mode himode;
49554 rtx (*gen_il) (rtx, rtx, rtx);
49555 rtx (*gen_ih) (rtx, rtx, rtx);
49556 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49557 struct expand_vec_perm_d d;
49558 bool ok, full_interleave;
49559 bool uns_p = false;
49560 int i;
49562 switch (qimode)
49564 case V16QImode:
49565 himode = V8HImode;
49566 gen_il = gen_vec_interleave_lowv16qi;
49567 gen_ih = gen_vec_interleave_highv16qi;
49568 break;
49569 case V32QImode:
49570 himode = V16HImode;
49571 gen_il = gen_avx2_interleave_lowv32qi;
49572 gen_ih = gen_avx2_interleave_highv32qi;
49573 break;
49574 case V64QImode:
49575 himode = V32HImode;
49576 gen_il = gen_avx512bw_interleave_lowv64qi;
49577 gen_ih = gen_avx512bw_interleave_highv64qi;
49578 break;
49579 default:
49580 gcc_unreachable ();
49583 op2_l = op2_h = op2;
49584 switch (code)
49586 case MULT:
49587 /* Unpack data such that we've got a source byte in each low byte of
49588 each word. We don't care what goes into the high byte of each word.
49589 Rather than trying to get zero in there, most convenient is to let
49590 it be a copy of the low byte. */
49591 op2_l = gen_reg_rtx (qimode);
49592 op2_h = gen_reg_rtx (qimode);
49593 emit_insn (gen_il (op2_l, op2, op2));
49594 emit_insn (gen_ih (op2_h, op2, op2));
49595 /* FALLTHRU */
49597 op1_l = gen_reg_rtx (qimode);
49598 op1_h = gen_reg_rtx (qimode);
49599 emit_insn (gen_il (op1_l, op1, op1));
49600 emit_insn (gen_ih (op1_h, op1, op1));
49601 full_interleave = qimode == V16QImode;
49602 break;
49604 case ASHIFT:
49605 case LSHIFTRT:
49606 uns_p = true;
49607 /* FALLTHRU */
49608 case ASHIFTRT:
49609 op1_l = gen_reg_rtx (himode);
49610 op1_h = gen_reg_rtx (himode);
49611 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
49612 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
49613 full_interleave = true;
49614 break;
49615 default:
49616 gcc_unreachable ();
49619 /* Perform the operation. */
49620 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
49621 1, OPTAB_DIRECT);
49622 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
49623 1, OPTAB_DIRECT);
49624 gcc_assert (res_l && res_h);
49626 /* Merge the data back into the right place. */
49627 d.target = dest;
49628 d.op0 = gen_lowpart (qimode, res_l);
49629 d.op1 = gen_lowpart (qimode, res_h);
49630 d.vmode = qimode;
49631 d.nelt = GET_MODE_NUNITS (qimode);
49632 d.one_operand_p = false;
49633 d.testing_p = false;
49635 if (full_interleave)
49637 /* For SSE2, we used an full interleave, so the desired
49638 results are in the even elements. */
49639 for (i = 0; i < 64; ++i)
49640 d.perm[i] = i * 2;
49642 else
49644 /* For AVX, the interleave used above was not cross-lane. So the
49645 extraction is evens but with the second and third quarter swapped.
49646 Happily, that is even one insn shorter than even extraction. */
49647 for (i = 0; i < 64; ++i)
49648 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
49651 ok = ix86_expand_vec_perm_const_1 (&d);
49652 gcc_assert (ok);
49654 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49655 gen_rtx_fmt_ee (code, qimode, op1, op2));
49658 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
49659 if op is CONST_VECTOR with all odd elements equal to their
49660 preceding element. */
49662 static bool
49663 const_vector_equal_evenodd_p (rtx op)
49665 machine_mode mode = GET_MODE (op);
49666 int i, nunits = GET_MODE_NUNITS (mode);
49667 if (GET_CODE (op) != CONST_VECTOR
49668 || nunits != CONST_VECTOR_NUNITS (op))
49669 return false;
49670 for (i = 0; i < nunits; i += 2)
49671 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
49672 return false;
49673 return true;
49676 void
49677 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
49678 bool uns_p, bool odd_p)
49680 machine_mode mode = GET_MODE (op1);
49681 machine_mode wmode = GET_MODE (dest);
49682 rtx x;
49683 rtx orig_op1 = op1, orig_op2 = op2;
49685 if (!nonimmediate_operand (op1, mode))
49686 op1 = force_reg (mode, op1);
49687 if (!nonimmediate_operand (op2, mode))
49688 op2 = force_reg (mode, op2);
49690 /* We only play even/odd games with vectors of SImode. */
49691 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
49693 /* If we're looking for the odd results, shift those members down to
49694 the even slots. For some cpus this is faster than a PSHUFD. */
49695 if (odd_p)
49697 /* For XOP use vpmacsdqh, but only for smult, as it is only
49698 signed. */
49699 if (TARGET_XOP && mode == V4SImode && !uns_p)
49701 x = force_reg (wmode, CONST0_RTX (wmode));
49702 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
49703 return;
49706 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
49707 if (!const_vector_equal_evenodd_p (orig_op1))
49708 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
49709 x, NULL, 1, OPTAB_DIRECT);
49710 if (!const_vector_equal_evenodd_p (orig_op2))
49711 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
49712 x, NULL, 1, OPTAB_DIRECT);
49713 op1 = gen_lowpart (mode, op1);
49714 op2 = gen_lowpart (mode, op2);
49717 if (mode == V16SImode)
49719 if (uns_p)
49720 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
49721 else
49722 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
49724 else if (mode == V8SImode)
49726 if (uns_p)
49727 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
49728 else
49729 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
49731 else if (uns_p)
49732 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
49733 else if (TARGET_SSE4_1)
49734 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
49735 else
49737 rtx s1, s2, t0, t1, t2;
49739 /* The easiest way to implement this without PMULDQ is to go through
49740 the motions as if we are performing a full 64-bit multiply. With
49741 the exception that we need to do less shuffling of the elements. */
49743 /* Compute the sign-extension, aka highparts, of the two operands. */
49744 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49745 op1, pc_rtx, pc_rtx);
49746 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49747 op2, pc_rtx, pc_rtx);
49749 /* Multiply LO(A) * HI(B), and vice-versa. */
49750 t1 = gen_reg_rtx (wmode);
49751 t2 = gen_reg_rtx (wmode);
49752 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
49753 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
49755 /* Multiply LO(A) * LO(B). */
49756 t0 = gen_reg_rtx (wmode);
49757 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
49759 /* Combine and shift the highparts into place. */
49760 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
49761 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
49762 1, OPTAB_DIRECT);
49764 /* Combine high and low parts. */
49765 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
49766 return;
49768 emit_insn (x);
49771 void
49772 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
49773 bool uns_p, bool high_p)
49775 machine_mode wmode = GET_MODE (dest);
49776 machine_mode mode = GET_MODE (op1);
49777 rtx t1, t2, t3, t4, mask;
49779 switch (mode)
49781 case V4SImode:
49782 t1 = gen_reg_rtx (mode);
49783 t2 = gen_reg_rtx (mode);
49784 if (TARGET_XOP && !uns_p)
49786 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
49787 shuffle the elements once so that all elements are in the right
49788 place for immediate use: { A C B D }. */
49789 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
49790 const1_rtx, GEN_INT (3)));
49791 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
49792 const1_rtx, GEN_INT (3)));
49794 else
49796 /* Put the elements into place for the multiply. */
49797 ix86_expand_vec_interleave (t1, op1, op1, high_p);
49798 ix86_expand_vec_interleave (t2, op2, op2, high_p);
49799 high_p = false;
49801 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
49802 break;
49804 case V8SImode:
49805 /* Shuffle the elements between the lanes. After this we
49806 have { A B E F | C D G H } for each operand. */
49807 t1 = gen_reg_rtx (V4DImode);
49808 t2 = gen_reg_rtx (V4DImode);
49809 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
49810 const0_rtx, const2_rtx,
49811 const1_rtx, GEN_INT (3)));
49812 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
49813 const0_rtx, const2_rtx,
49814 const1_rtx, GEN_INT (3)));
49816 /* Shuffle the elements within the lanes. After this we
49817 have { A A B B | C C D D } or { E E F F | G G H H }. */
49818 t3 = gen_reg_rtx (V8SImode);
49819 t4 = gen_reg_rtx (V8SImode);
49820 mask = GEN_INT (high_p
49821 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
49822 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
49823 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
49824 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
49826 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
49827 break;
49829 case V8HImode:
49830 case V16HImode:
49831 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
49832 uns_p, OPTAB_DIRECT);
49833 t2 = expand_binop (mode,
49834 uns_p ? umul_highpart_optab : smul_highpart_optab,
49835 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
49836 gcc_assert (t1 && t2);
49838 t3 = gen_reg_rtx (mode);
49839 ix86_expand_vec_interleave (t3, t1, t2, high_p);
49840 emit_move_insn (dest, gen_lowpart (wmode, t3));
49841 break;
49843 case V16QImode:
49844 case V32QImode:
49845 case V32HImode:
49846 case V16SImode:
49847 case V64QImode:
49848 t1 = gen_reg_rtx (wmode);
49849 t2 = gen_reg_rtx (wmode);
49850 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
49851 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
49853 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
49854 break;
49856 default:
49857 gcc_unreachable ();
49861 void
49862 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
49864 rtx res_1, res_2, res_3, res_4;
49866 res_1 = gen_reg_rtx (V4SImode);
49867 res_2 = gen_reg_rtx (V4SImode);
49868 res_3 = gen_reg_rtx (V2DImode);
49869 res_4 = gen_reg_rtx (V2DImode);
49870 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
49871 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
49873 /* Move the results in element 2 down to element 1; we don't care
49874 what goes in elements 2 and 3. Then we can merge the parts
49875 back together with an interleave.
49877 Note that two other sequences were tried:
49878 (1) Use interleaves at the start instead of psrldq, which allows
49879 us to use a single shufps to merge things back at the end.
49880 (2) Use shufps here to combine the two vectors, then pshufd to
49881 put the elements in the correct order.
49882 In both cases the cost of the reformatting stall was too high
49883 and the overall sequence slower. */
49885 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
49886 const0_rtx, const2_rtx,
49887 const0_rtx, const0_rtx));
49888 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
49889 const0_rtx, const2_rtx,
49890 const0_rtx, const0_rtx));
49891 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
49893 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
49896 void
49897 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
49899 machine_mode mode = GET_MODE (op0);
49900 rtx t1, t2, t3, t4, t5, t6;
49902 if (TARGET_AVX512DQ && mode == V8DImode)
49903 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
49904 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
49905 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
49906 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
49907 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
49908 else if (TARGET_XOP && mode == V2DImode)
49910 /* op1: A,B,C,D, op2: E,F,G,H */
49911 op1 = gen_lowpart (V4SImode, op1);
49912 op2 = gen_lowpart (V4SImode, op2);
49914 t1 = gen_reg_rtx (V4SImode);
49915 t2 = gen_reg_rtx (V4SImode);
49916 t3 = gen_reg_rtx (V2DImode);
49917 t4 = gen_reg_rtx (V2DImode);
49919 /* t1: B,A,D,C */
49920 emit_insn (gen_sse2_pshufd_1 (t1, op1,
49921 GEN_INT (1),
49922 GEN_INT (0),
49923 GEN_INT (3),
49924 GEN_INT (2)));
49926 /* t2: (B*E),(A*F),(D*G),(C*H) */
49927 emit_insn (gen_mulv4si3 (t2, t1, op2));
49929 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
49930 emit_insn (gen_xop_phadddq (t3, t2));
49932 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
49933 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
49935 /* Multiply lower parts and add all */
49936 t5 = gen_reg_rtx (V2DImode);
49937 emit_insn (gen_vec_widen_umult_even_v4si (t5,
49938 gen_lowpart (V4SImode, op1),
49939 gen_lowpart (V4SImode, op2)));
49940 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
49943 else
49945 machine_mode nmode;
49946 rtx (*umul) (rtx, rtx, rtx);
49948 if (mode == V2DImode)
49950 umul = gen_vec_widen_umult_even_v4si;
49951 nmode = V4SImode;
49953 else if (mode == V4DImode)
49955 umul = gen_vec_widen_umult_even_v8si;
49956 nmode = V8SImode;
49958 else if (mode == V8DImode)
49960 umul = gen_vec_widen_umult_even_v16si;
49961 nmode = V16SImode;
49963 else
49964 gcc_unreachable ();
49967 /* Multiply low parts. */
49968 t1 = gen_reg_rtx (mode);
49969 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
49971 /* Shift input vectors right 32 bits so we can multiply high parts. */
49972 t6 = GEN_INT (32);
49973 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
49974 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
49976 /* Multiply high parts by low parts. */
49977 t4 = gen_reg_rtx (mode);
49978 t5 = gen_reg_rtx (mode);
49979 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
49980 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
49982 /* Combine and shift the highparts back. */
49983 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
49984 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
49986 /* Combine high and low parts. */
49987 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
49990 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49991 gen_rtx_MULT (mode, op1, op2));
49994 /* Return 1 if control tansfer instruction INSN
49995 should be encoded with bnd prefix.
49996 If insn is NULL then return 1 when control
49997 transfer instructions should be prefixed with
49998 bnd by default for current function. */
50000 bool
50001 ix86_bnd_prefixed_insn_p (rtx insn)
50003 /* For call insns check special flag. */
50004 if (insn && CALL_P (insn))
50006 rtx call = get_call_rtx_from (insn);
50007 if (call)
50008 return CALL_EXPR_WITH_BOUNDS_P (call);
50011 /* All other insns are prefixed only if function is instrumented. */
50012 return chkp_function_instrumented_p (current_function_decl);
50015 /* Calculate integer abs() using only SSE2 instructions. */
50017 void
50018 ix86_expand_sse2_abs (rtx target, rtx input)
50020 machine_mode mode = GET_MODE (target);
50021 rtx tmp0, tmp1, x;
50023 switch (mode)
50025 /* For 32-bit signed integer X, the best way to calculate the absolute
50026 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
50027 case V4SImode:
50028 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
50029 GEN_INT (GET_MODE_BITSIZE
50030 (GET_MODE_INNER (mode)) - 1),
50031 NULL, 0, OPTAB_DIRECT);
50032 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
50033 NULL, 0, OPTAB_DIRECT);
50034 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
50035 target, 0, OPTAB_DIRECT);
50036 break;
50038 /* For 16-bit signed integer X, the best way to calculate the absolute
50039 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
50040 case V8HImode:
50041 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50043 x = expand_simple_binop (mode, SMAX, tmp0, input,
50044 target, 0, OPTAB_DIRECT);
50045 break;
50047 /* For 8-bit signed integer X, the best way to calculate the absolute
50048 value of X is min ((unsigned char) X, (unsigned char) (-X)),
50049 as SSE2 provides the PMINUB insn. */
50050 case V16QImode:
50051 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50053 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
50054 target, 0, OPTAB_DIRECT);
50055 break;
50057 default:
50058 gcc_unreachable ();
50061 if (x != target)
50062 emit_move_insn (target, x);
50065 /* Expand an insert into a vector register through pinsr insn.
50066 Return true if successful. */
50068 bool
50069 ix86_expand_pinsr (rtx *operands)
50071 rtx dst = operands[0];
50072 rtx src = operands[3];
50074 unsigned int size = INTVAL (operands[1]);
50075 unsigned int pos = INTVAL (operands[2]);
50077 if (GET_CODE (dst) == SUBREG)
50079 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
50080 dst = SUBREG_REG (dst);
50083 if (GET_CODE (src) == SUBREG)
50084 src = SUBREG_REG (src);
50086 switch (GET_MODE (dst))
50088 case V16QImode:
50089 case V8HImode:
50090 case V4SImode:
50091 case V2DImode:
50093 machine_mode srcmode, dstmode;
50094 rtx (*pinsr)(rtx, rtx, rtx, rtx);
50096 srcmode = mode_for_size (size, MODE_INT, 0);
50098 switch (srcmode)
50100 case QImode:
50101 if (!TARGET_SSE4_1)
50102 return false;
50103 dstmode = V16QImode;
50104 pinsr = gen_sse4_1_pinsrb;
50105 break;
50107 case HImode:
50108 if (!TARGET_SSE2)
50109 return false;
50110 dstmode = V8HImode;
50111 pinsr = gen_sse2_pinsrw;
50112 break;
50114 case SImode:
50115 if (!TARGET_SSE4_1)
50116 return false;
50117 dstmode = V4SImode;
50118 pinsr = gen_sse4_1_pinsrd;
50119 break;
50121 case DImode:
50122 gcc_assert (TARGET_64BIT);
50123 if (!TARGET_SSE4_1)
50124 return false;
50125 dstmode = V2DImode;
50126 pinsr = gen_sse4_1_pinsrq;
50127 break;
50129 default:
50130 return false;
50133 rtx d = dst;
50134 if (GET_MODE (dst) != dstmode)
50135 d = gen_reg_rtx (dstmode);
50136 src = gen_lowpart (srcmode, src);
50138 pos /= size;
50140 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
50141 GEN_INT (1 << pos)));
50142 if (d != dst)
50143 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
50144 return true;
50147 default:
50148 return false;
50152 /* This function returns the calling abi specific va_list type node.
50153 It returns the FNDECL specific va_list type. */
50155 static tree
50156 ix86_fn_abi_va_list (tree fndecl)
50158 if (!TARGET_64BIT)
50159 return va_list_type_node;
50160 gcc_assert (fndecl != NULL_TREE);
50162 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
50163 return ms_va_list_type_node;
50164 else
50165 return sysv_va_list_type_node;
50168 /* Returns the canonical va_list type specified by TYPE. If there
50169 is no valid TYPE provided, it return NULL_TREE. */
50171 static tree
50172 ix86_canonical_va_list_type (tree type)
50174 tree wtype, htype;
50176 /* Resolve references and pointers to va_list type. */
50177 if (TREE_CODE (type) == MEM_REF)
50178 type = TREE_TYPE (type);
50179 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
50180 type = TREE_TYPE (type);
50181 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
50182 type = TREE_TYPE (type);
50184 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
50186 wtype = va_list_type_node;
50187 gcc_assert (wtype != NULL_TREE);
50188 htype = type;
50189 if (TREE_CODE (wtype) == ARRAY_TYPE)
50191 /* If va_list is an array type, the argument may have decayed
50192 to a pointer type, e.g. by being passed to another function.
50193 In that case, unwrap both types so that we can compare the
50194 underlying records. */
50195 if (TREE_CODE (htype) == ARRAY_TYPE
50196 || POINTER_TYPE_P (htype))
50198 wtype = TREE_TYPE (wtype);
50199 htype = TREE_TYPE (htype);
50202 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50203 return va_list_type_node;
50204 wtype = sysv_va_list_type_node;
50205 gcc_assert (wtype != NULL_TREE);
50206 htype = type;
50207 if (TREE_CODE (wtype) == ARRAY_TYPE)
50209 /* If va_list is an array type, the argument may have decayed
50210 to a pointer type, e.g. by being passed to another function.
50211 In that case, unwrap both types so that we can compare the
50212 underlying records. */
50213 if (TREE_CODE (htype) == ARRAY_TYPE
50214 || POINTER_TYPE_P (htype))
50216 wtype = TREE_TYPE (wtype);
50217 htype = TREE_TYPE (htype);
50220 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50221 return sysv_va_list_type_node;
50222 wtype = ms_va_list_type_node;
50223 gcc_assert (wtype != NULL_TREE);
50224 htype = type;
50225 if (TREE_CODE (wtype) == ARRAY_TYPE)
50227 /* If va_list is an array type, the argument may have decayed
50228 to a pointer type, e.g. by being passed to another function.
50229 In that case, unwrap both types so that we can compare the
50230 underlying records. */
50231 if (TREE_CODE (htype) == ARRAY_TYPE
50232 || POINTER_TYPE_P (htype))
50234 wtype = TREE_TYPE (wtype);
50235 htype = TREE_TYPE (htype);
50238 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50239 return ms_va_list_type_node;
50240 return NULL_TREE;
50242 return std_canonical_va_list_type (type);
50245 /* Iterate through the target-specific builtin types for va_list.
50246 IDX denotes the iterator, *PTREE is set to the result type of
50247 the va_list builtin, and *PNAME to its internal type.
50248 Returns zero if there is no element for this index, otherwise
50249 IDX should be increased upon the next call.
50250 Note, do not iterate a base builtin's name like __builtin_va_list.
50251 Used from c_common_nodes_and_builtins. */
50253 static int
50254 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50256 if (TARGET_64BIT)
50258 switch (idx)
50260 default:
50261 break;
50263 case 0:
50264 *ptree = ms_va_list_type_node;
50265 *pname = "__builtin_ms_va_list";
50266 return 1;
50268 case 1:
50269 *ptree = sysv_va_list_type_node;
50270 *pname = "__builtin_sysv_va_list";
50271 return 1;
50275 return 0;
50278 #undef TARGET_SCHED_DISPATCH
50279 #define TARGET_SCHED_DISPATCH has_dispatch
50280 #undef TARGET_SCHED_DISPATCH_DO
50281 #define TARGET_SCHED_DISPATCH_DO do_dispatch
50282 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50283 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50284 #undef TARGET_SCHED_REORDER
50285 #define TARGET_SCHED_REORDER ix86_sched_reorder
50286 #undef TARGET_SCHED_ADJUST_PRIORITY
50287 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50288 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50289 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50290 ix86_dependencies_evaluation_hook
50292 /* The size of the dispatch window is the total number of bytes of
50293 object code allowed in a window. */
50294 #define DISPATCH_WINDOW_SIZE 16
50296 /* Number of dispatch windows considered for scheduling. */
50297 #define MAX_DISPATCH_WINDOWS 3
50299 /* Maximum number of instructions in a window. */
50300 #define MAX_INSN 4
50302 /* Maximum number of immediate operands in a window. */
50303 #define MAX_IMM 4
50305 /* Maximum number of immediate bits allowed in a window. */
50306 #define MAX_IMM_SIZE 128
50308 /* Maximum number of 32 bit immediates allowed in a window. */
50309 #define MAX_IMM_32 4
50311 /* Maximum number of 64 bit immediates allowed in a window. */
50312 #define MAX_IMM_64 2
50314 /* Maximum total of loads or prefetches allowed in a window. */
50315 #define MAX_LOAD 2
50317 /* Maximum total of stores allowed in a window. */
50318 #define MAX_STORE 1
50320 #undef BIG
50321 #define BIG 100
50324 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
50325 enum dispatch_group {
50326 disp_no_group = 0,
50327 disp_load,
50328 disp_store,
50329 disp_load_store,
50330 disp_prefetch,
50331 disp_imm,
50332 disp_imm_32,
50333 disp_imm_64,
50334 disp_branch,
50335 disp_cmp,
50336 disp_jcc,
50337 disp_last
50340 /* Number of allowable groups in a dispatch window. It is an array
50341 indexed by dispatch_group enum. 100 is used as a big number,
50342 because the number of these kind of operations does not have any
50343 effect in dispatch window, but we need them for other reasons in
50344 the table. */
50345 static unsigned int num_allowable_groups[disp_last] = {
50346 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
50349 char group_name[disp_last + 1][16] = {
50350 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50351 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50352 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50355 /* Instruction path. */
50356 enum insn_path {
50357 no_path = 0,
50358 path_single, /* Single micro op. */
50359 path_double, /* Double micro op. */
50360 path_multi, /* Instructions with more than 2 micro op.. */
50361 last_path
50364 /* sched_insn_info defines a window to the instructions scheduled in
50365 the basic block. It contains a pointer to the insn_info table and
50366 the instruction scheduled.
50368 Windows are allocated for each basic block and are linked
50369 together. */
50370 typedef struct sched_insn_info_s {
50371 rtx insn;
50372 enum dispatch_group group;
50373 enum insn_path path;
50374 int byte_len;
50375 int imm_bytes;
50376 } sched_insn_info;
50378 /* Linked list of dispatch windows. This is a two way list of
50379 dispatch windows of a basic block. It contains information about
50380 the number of uops in the window and the total number of
50381 instructions and of bytes in the object code for this dispatch
50382 window. */
50383 typedef struct dispatch_windows_s {
50384 int num_insn; /* Number of insn in the window. */
50385 int num_uops; /* Number of uops in the window. */
50386 int window_size; /* Number of bytes in the window. */
50387 int window_num; /* Window number between 0 or 1. */
50388 int num_imm; /* Number of immediates in an insn. */
50389 int num_imm_32; /* Number of 32 bit immediates in an insn. */
50390 int num_imm_64; /* Number of 64 bit immediates in an insn. */
50391 int imm_size; /* Total immediates in the window. */
50392 int num_loads; /* Total memory loads in the window. */
50393 int num_stores; /* Total memory stores in the window. */
50394 int violation; /* Violation exists in window. */
50395 sched_insn_info *window; /* Pointer to the window. */
50396 struct dispatch_windows_s *next;
50397 struct dispatch_windows_s *prev;
50398 } dispatch_windows;
50400 /* Immediate valuse used in an insn. */
50401 typedef struct imm_info_s
50403 int imm;
50404 int imm32;
50405 int imm64;
50406 } imm_info;
50408 static dispatch_windows *dispatch_window_list;
50409 static dispatch_windows *dispatch_window_list1;
50411 /* Get dispatch group of insn. */
50413 static enum dispatch_group
50414 get_mem_group (rtx_insn *insn)
50416 enum attr_memory memory;
50418 if (INSN_CODE (insn) < 0)
50419 return disp_no_group;
50420 memory = get_attr_memory (insn);
50421 if (memory == MEMORY_STORE)
50422 return disp_store;
50424 if (memory == MEMORY_LOAD)
50425 return disp_load;
50427 if (memory == MEMORY_BOTH)
50428 return disp_load_store;
50430 return disp_no_group;
50433 /* Return true if insn is a compare instruction. */
50435 static bool
50436 is_cmp (rtx_insn *insn)
50438 enum attr_type type;
50440 type = get_attr_type (insn);
50441 return (type == TYPE_TEST
50442 || type == TYPE_ICMP
50443 || type == TYPE_FCMP
50444 || GET_CODE (PATTERN (insn)) == COMPARE);
50447 /* Return true if a dispatch violation encountered. */
50449 static bool
50450 dispatch_violation (void)
50452 if (dispatch_window_list->next)
50453 return dispatch_window_list->next->violation;
50454 return dispatch_window_list->violation;
50457 /* Return true if insn is a branch instruction. */
50459 static bool
50460 is_branch (rtx_insn *insn)
50462 return (CALL_P (insn) || JUMP_P (insn));
50465 /* Return true if insn is a prefetch instruction. */
50467 static bool
50468 is_prefetch (rtx_insn *insn)
50470 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
50473 /* This function initializes a dispatch window and the list container holding a
50474 pointer to the window. */
50476 static void
50477 init_window (int window_num)
50479 int i;
50480 dispatch_windows *new_list;
50482 if (window_num == 0)
50483 new_list = dispatch_window_list;
50484 else
50485 new_list = dispatch_window_list1;
50487 new_list->num_insn = 0;
50488 new_list->num_uops = 0;
50489 new_list->window_size = 0;
50490 new_list->next = NULL;
50491 new_list->prev = NULL;
50492 new_list->window_num = window_num;
50493 new_list->num_imm = 0;
50494 new_list->num_imm_32 = 0;
50495 new_list->num_imm_64 = 0;
50496 new_list->imm_size = 0;
50497 new_list->num_loads = 0;
50498 new_list->num_stores = 0;
50499 new_list->violation = false;
50501 for (i = 0; i < MAX_INSN; i++)
50503 new_list->window[i].insn = NULL;
50504 new_list->window[i].group = disp_no_group;
50505 new_list->window[i].path = no_path;
50506 new_list->window[i].byte_len = 0;
50507 new_list->window[i].imm_bytes = 0;
50509 return;
50512 /* This function allocates and initializes a dispatch window and the
50513 list container holding a pointer to the window. */
50515 static dispatch_windows *
50516 allocate_window (void)
50518 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
50519 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
50521 return new_list;
50524 /* This routine initializes the dispatch scheduling information. It
50525 initiates building dispatch scheduler tables and constructs the
50526 first dispatch window. */
50528 static void
50529 init_dispatch_sched (void)
50531 /* Allocate a dispatch list and a window. */
50532 dispatch_window_list = allocate_window ();
50533 dispatch_window_list1 = allocate_window ();
50534 init_window (0);
50535 init_window (1);
50538 /* This function returns true if a branch is detected. End of a basic block
50539 does not have to be a branch, but here we assume only branches end a
50540 window. */
50542 static bool
50543 is_end_basic_block (enum dispatch_group group)
50545 return group == disp_branch;
50548 /* This function is called when the end of a window processing is reached. */
50550 static void
50551 process_end_window (void)
50553 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
50554 if (dispatch_window_list->next)
50556 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
50557 gcc_assert (dispatch_window_list->window_size
50558 + dispatch_window_list1->window_size <= 48);
50559 init_window (1);
50561 init_window (0);
50564 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
50565 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
50566 for 48 bytes of instructions. Note that these windows are not dispatch
50567 windows that their sizes are DISPATCH_WINDOW_SIZE. */
50569 static dispatch_windows *
50570 allocate_next_window (int window_num)
50572 if (window_num == 0)
50574 if (dispatch_window_list->next)
50575 init_window (1);
50576 init_window (0);
50577 return dispatch_window_list;
50580 dispatch_window_list->next = dispatch_window_list1;
50581 dispatch_window_list1->prev = dispatch_window_list;
50583 return dispatch_window_list1;
50586 /* Compute number of immediate operands of an instruction. */
50588 static void
50589 find_constant (rtx in_rtx, imm_info *imm_values)
50591 if (INSN_P (in_rtx))
50592 in_rtx = PATTERN (in_rtx);
50593 subrtx_iterator::array_type array;
50594 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
50595 if (const_rtx x = *iter)
50596 switch (GET_CODE (x))
50598 case CONST:
50599 case SYMBOL_REF:
50600 case CONST_INT:
50601 (imm_values->imm)++;
50602 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
50603 (imm_values->imm32)++;
50604 else
50605 (imm_values->imm64)++;
50606 break;
50608 case CONST_DOUBLE:
50609 case CONST_WIDE_INT:
50610 (imm_values->imm)++;
50611 (imm_values->imm64)++;
50612 break;
50614 case CODE_LABEL:
50615 if (LABEL_KIND (x) == LABEL_NORMAL)
50617 (imm_values->imm)++;
50618 (imm_values->imm32)++;
50620 break;
50622 default:
50623 break;
50627 /* Return total size of immediate operands of an instruction along with number
50628 of corresponding immediate-operands. It initializes its parameters to zero
50629 befor calling FIND_CONSTANT.
50630 INSN is the input instruction. IMM is the total of immediates.
50631 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
50632 bit immediates. */
50634 static int
50635 get_num_immediates (rtx_insn *insn, int *imm, int *imm32, int *imm64)
50637 imm_info imm_values = {0, 0, 0};
50639 find_constant (insn, &imm_values);
50640 *imm = imm_values.imm;
50641 *imm32 = imm_values.imm32;
50642 *imm64 = imm_values.imm64;
50643 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
50646 /* This function indicates if an operand of an instruction is an
50647 immediate. */
50649 static bool
50650 has_immediate (rtx_insn *insn)
50652 int num_imm_operand;
50653 int num_imm32_operand;
50654 int num_imm64_operand;
50656 if (insn)
50657 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50658 &num_imm64_operand);
50659 return false;
50662 /* Return single or double path for instructions. */
50664 static enum insn_path
50665 get_insn_path (rtx_insn *insn)
50667 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
50669 if ((int)path == 0)
50670 return path_single;
50672 if ((int)path == 1)
50673 return path_double;
50675 return path_multi;
50678 /* Return insn dispatch group. */
50680 static enum dispatch_group
50681 get_insn_group (rtx_insn *insn)
50683 enum dispatch_group group = get_mem_group (insn);
50684 if (group)
50685 return group;
50687 if (is_branch (insn))
50688 return disp_branch;
50690 if (is_cmp (insn))
50691 return disp_cmp;
50693 if (has_immediate (insn))
50694 return disp_imm;
50696 if (is_prefetch (insn))
50697 return disp_prefetch;
50699 return disp_no_group;
50702 /* Count number of GROUP restricted instructions in a dispatch
50703 window WINDOW_LIST. */
50705 static int
50706 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
50708 enum dispatch_group group = get_insn_group (insn);
50709 int imm_size;
50710 int num_imm_operand;
50711 int num_imm32_operand;
50712 int num_imm64_operand;
50714 if (group == disp_no_group)
50715 return 0;
50717 if (group == disp_imm)
50719 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50720 &num_imm64_operand);
50721 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
50722 || num_imm_operand + window_list->num_imm > MAX_IMM
50723 || (num_imm32_operand > 0
50724 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
50725 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
50726 || (num_imm64_operand > 0
50727 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
50728 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
50729 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
50730 && num_imm64_operand > 0
50731 && ((window_list->num_imm_64 > 0
50732 && window_list->num_insn >= 2)
50733 || window_list->num_insn >= 3)))
50734 return BIG;
50736 return 1;
50739 if ((group == disp_load_store
50740 && (window_list->num_loads >= MAX_LOAD
50741 || window_list->num_stores >= MAX_STORE))
50742 || ((group == disp_load
50743 || group == disp_prefetch)
50744 && window_list->num_loads >= MAX_LOAD)
50745 || (group == disp_store
50746 && window_list->num_stores >= MAX_STORE))
50747 return BIG;
50749 return 1;
50752 /* This function returns true if insn satisfies dispatch rules on the
50753 last window scheduled. */
50755 static bool
50756 fits_dispatch_window (rtx_insn *insn)
50758 dispatch_windows *window_list = dispatch_window_list;
50759 dispatch_windows *window_list_next = dispatch_window_list->next;
50760 unsigned int num_restrict;
50761 enum dispatch_group group = get_insn_group (insn);
50762 enum insn_path path = get_insn_path (insn);
50763 int sum;
50765 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
50766 instructions should be given the lowest priority in the
50767 scheduling process in Haifa scheduler to make sure they will be
50768 scheduled in the same dispatch window as the reference to them. */
50769 if (group == disp_jcc || group == disp_cmp)
50770 return false;
50772 /* Check nonrestricted. */
50773 if (group == disp_no_group || group == disp_branch)
50774 return true;
50776 /* Get last dispatch window. */
50777 if (window_list_next)
50778 window_list = window_list_next;
50780 if (window_list->window_num == 1)
50782 sum = window_list->prev->window_size + window_list->window_size;
50784 if (sum == 32
50785 || (min_insn_size (insn) + sum) >= 48)
50786 /* Window 1 is full. Go for next window. */
50787 return true;
50790 num_restrict = count_num_restricted (insn, window_list);
50792 if (num_restrict > num_allowable_groups[group])
50793 return false;
50795 /* See if it fits in the first window. */
50796 if (window_list->window_num == 0)
50798 /* The first widow should have only single and double path
50799 uops. */
50800 if (path == path_double
50801 && (window_list->num_uops + 2) > MAX_INSN)
50802 return false;
50803 else if (path != path_single)
50804 return false;
50806 return true;
50809 /* Add an instruction INSN with NUM_UOPS micro-operations to the
50810 dispatch window WINDOW_LIST. */
50812 static void
50813 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
50815 int byte_len = min_insn_size (insn);
50816 int num_insn = window_list->num_insn;
50817 int imm_size;
50818 sched_insn_info *window = window_list->window;
50819 enum dispatch_group group = get_insn_group (insn);
50820 enum insn_path path = get_insn_path (insn);
50821 int num_imm_operand;
50822 int num_imm32_operand;
50823 int num_imm64_operand;
50825 if (!window_list->violation && group != disp_cmp
50826 && !fits_dispatch_window (insn))
50827 window_list->violation = true;
50829 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50830 &num_imm64_operand);
50832 /* Initialize window with new instruction. */
50833 window[num_insn].insn = insn;
50834 window[num_insn].byte_len = byte_len;
50835 window[num_insn].group = group;
50836 window[num_insn].path = path;
50837 window[num_insn].imm_bytes = imm_size;
50839 window_list->window_size += byte_len;
50840 window_list->num_insn = num_insn + 1;
50841 window_list->num_uops = window_list->num_uops + num_uops;
50842 window_list->imm_size += imm_size;
50843 window_list->num_imm += num_imm_operand;
50844 window_list->num_imm_32 += num_imm32_operand;
50845 window_list->num_imm_64 += num_imm64_operand;
50847 if (group == disp_store)
50848 window_list->num_stores += 1;
50849 else if (group == disp_load
50850 || group == disp_prefetch)
50851 window_list->num_loads += 1;
50852 else if (group == disp_load_store)
50854 window_list->num_stores += 1;
50855 window_list->num_loads += 1;
50859 /* Adds a scheduled instruction, INSN, to the current dispatch window.
50860 If the total bytes of instructions or the number of instructions in
50861 the window exceed allowable, it allocates a new window. */
50863 static void
50864 add_to_dispatch_window (rtx_insn *insn)
50866 int byte_len;
50867 dispatch_windows *window_list;
50868 dispatch_windows *next_list;
50869 dispatch_windows *window0_list;
50870 enum insn_path path;
50871 enum dispatch_group insn_group;
50872 bool insn_fits;
50873 int num_insn;
50874 int num_uops;
50875 int window_num;
50876 int insn_num_uops;
50877 int sum;
50879 if (INSN_CODE (insn) < 0)
50880 return;
50882 byte_len = min_insn_size (insn);
50883 window_list = dispatch_window_list;
50884 next_list = window_list->next;
50885 path = get_insn_path (insn);
50886 insn_group = get_insn_group (insn);
50888 /* Get the last dispatch window. */
50889 if (next_list)
50890 window_list = dispatch_window_list->next;
50892 if (path == path_single)
50893 insn_num_uops = 1;
50894 else if (path == path_double)
50895 insn_num_uops = 2;
50896 else
50897 insn_num_uops = (int) path;
50899 /* If current window is full, get a new window.
50900 Window number zero is full, if MAX_INSN uops are scheduled in it.
50901 Window number one is full, if window zero's bytes plus window
50902 one's bytes is 32, or if the bytes of the new instruction added
50903 to the total makes it greater than 48, or it has already MAX_INSN
50904 instructions in it. */
50905 num_insn = window_list->num_insn;
50906 num_uops = window_list->num_uops;
50907 window_num = window_list->window_num;
50908 insn_fits = fits_dispatch_window (insn);
50910 if (num_insn >= MAX_INSN
50911 || num_uops + insn_num_uops > MAX_INSN
50912 || !(insn_fits))
50914 window_num = ~window_num & 1;
50915 window_list = allocate_next_window (window_num);
50918 if (window_num == 0)
50920 add_insn_window (insn, window_list, insn_num_uops);
50921 if (window_list->num_insn >= MAX_INSN
50922 && insn_group == disp_branch)
50924 process_end_window ();
50925 return;
50928 else if (window_num == 1)
50930 window0_list = window_list->prev;
50931 sum = window0_list->window_size + window_list->window_size;
50932 if (sum == 32
50933 || (byte_len + sum) >= 48)
50935 process_end_window ();
50936 window_list = dispatch_window_list;
50939 add_insn_window (insn, window_list, insn_num_uops);
50941 else
50942 gcc_unreachable ();
50944 if (is_end_basic_block (insn_group))
50946 /* End of basic block is reached do end-basic-block process. */
50947 process_end_window ();
50948 return;
50952 /* Print the dispatch window, WINDOW_NUM, to FILE. */
50954 DEBUG_FUNCTION static void
50955 debug_dispatch_window_file (FILE *file, int window_num)
50957 dispatch_windows *list;
50958 int i;
50960 if (window_num == 0)
50961 list = dispatch_window_list;
50962 else
50963 list = dispatch_window_list1;
50965 fprintf (file, "Window #%d:\n", list->window_num);
50966 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
50967 list->num_insn, list->num_uops, list->window_size);
50968 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
50969 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
50971 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
50972 list->num_stores);
50973 fprintf (file, " insn info:\n");
50975 for (i = 0; i < MAX_INSN; i++)
50977 if (!list->window[i].insn)
50978 break;
50979 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
50980 i, group_name[list->window[i].group],
50981 i, (void *)list->window[i].insn,
50982 i, list->window[i].path,
50983 i, list->window[i].byte_len,
50984 i, list->window[i].imm_bytes);
50988 /* Print to stdout a dispatch window. */
50990 DEBUG_FUNCTION void
50991 debug_dispatch_window (int window_num)
50993 debug_dispatch_window_file (stdout, window_num);
50996 /* Print INSN dispatch information to FILE. */
50998 DEBUG_FUNCTION static void
50999 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
51001 int byte_len;
51002 enum insn_path path;
51003 enum dispatch_group group;
51004 int imm_size;
51005 int num_imm_operand;
51006 int num_imm32_operand;
51007 int num_imm64_operand;
51009 if (INSN_CODE (insn) < 0)
51010 return;
51012 byte_len = min_insn_size (insn);
51013 path = get_insn_path (insn);
51014 group = get_insn_group (insn);
51015 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51016 &num_imm64_operand);
51018 fprintf (file, " insn info:\n");
51019 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
51020 group_name[group], path, byte_len);
51021 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51022 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
51025 /* Print to STDERR the status of the ready list with respect to
51026 dispatch windows. */
51028 DEBUG_FUNCTION void
51029 debug_ready_dispatch (void)
51031 int i;
51032 int no_ready = number_in_ready ();
51034 fprintf (stdout, "Number of ready: %d\n", no_ready);
51036 for (i = 0; i < no_ready; i++)
51037 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
51040 /* This routine is the driver of the dispatch scheduler. */
51042 static void
51043 do_dispatch (rtx_insn *insn, int mode)
51045 if (mode == DISPATCH_INIT)
51046 init_dispatch_sched ();
51047 else if (mode == ADD_TO_DISPATCH_WINDOW)
51048 add_to_dispatch_window (insn);
51051 /* Return TRUE if Dispatch Scheduling is supported. */
51053 static bool
51054 has_dispatch (rtx_insn *insn, int action)
51056 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
51057 && flag_dispatch_scheduler)
51058 switch (action)
51060 default:
51061 return false;
51063 case IS_DISPATCH_ON:
51064 return true;
51065 break;
51067 case IS_CMP:
51068 return is_cmp (insn);
51070 case DISPATCH_VIOLATION:
51071 return dispatch_violation ();
51073 case FITS_DISPATCH_WINDOW:
51074 return fits_dispatch_window (insn);
51077 return false;
51080 /* Implementation of reassociation_width target hook used by
51081 reassoc phase to identify parallelism level in reassociated
51082 tree. Statements tree_code is passed in OPC. Arguments type
51083 is passed in MODE.
51085 Currently parallel reassociation is enabled for Atom
51086 processors only and we set reassociation width to be 2
51087 because Atom may issue up to 2 instructions per cycle.
51089 Return value should be fixed if parallel reassociation is
51090 enabled for other processors. */
51092 static int
51093 ix86_reassociation_width (unsigned int, machine_mode mode)
51095 /* Vector part. */
51096 if (VECTOR_MODE_P (mode))
51098 if (TARGET_VECTOR_PARALLEL_EXECUTION)
51099 return 2;
51100 else
51101 return 1;
51104 /* Scalar part. */
51105 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
51106 return 2;
51107 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
51108 return 2;
51109 else
51110 return 1;
51113 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
51114 place emms and femms instructions. */
51116 static machine_mode
51117 ix86_preferred_simd_mode (machine_mode mode)
51119 if (!TARGET_SSE)
51120 return word_mode;
51122 switch (mode)
51124 case QImode:
51125 return TARGET_AVX512BW ? V64QImode :
51126 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
51127 case HImode:
51128 return TARGET_AVX512BW ? V32HImode :
51129 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
51130 case SImode:
51131 return TARGET_AVX512F ? V16SImode :
51132 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
51133 case DImode:
51134 return TARGET_AVX512F ? V8DImode :
51135 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
51137 case SFmode:
51138 if (TARGET_AVX512F)
51139 return V16SFmode;
51140 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51141 return V8SFmode;
51142 else
51143 return V4SFmode;
51145 case DFmode:
51146 if (!TARGET_VECTORIZE_DOUBLE)
51147 return word_mode;
51148 else if (TARGET_AVX512F)
51149 return V8DFmode;
51150 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51151 return V4DFmode;
51152 else if (TARGET_SSE2)
51153 return V2DFmode;
51154 /* FALLTHRU */
51156 default:
51157 return word_mode;
51161 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
51162 vectors. If AVX512F is enabled then try vectorizing with 512bit,
51163 256bit and 128bit vectors. */
51165 static unsigned int
51166 ix86_autovectorize_vector_sizes (void)
51168 return TARGET_AVX512F ? 64 | 32 | 16 :
51169 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
51174 /* Return class of registers which could be used for pseudo of MODE
51175 and of class RCLASS for spilling instead of memory. Return NO_REGS
51176 if it is not possible or non-profitable. */
51177 static reg_class_t
51178 ix86_spill_class (reg_class_t rclass, machine_mode mode)
51180 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
51181 && (mode == SImode || (TARGET_64BIT && mode == DImode))
51182 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
51183 return ALL_SSE_REGS;
51184 return NO_REGS;
51187 /* Implement targetm.vectorize.init_cost. */
51189 static void *
51190 ix86_init_cost (struct loop *)
51192 unsigned *cost = XNEWVEC (unsigned, 3);
51193 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
51194 return cost;
51197 /* Implement targetm.vectorize.add_stmt_cost. */
51199 static unsigned
51200 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
51201 struct _stmt_vec_info *stmt_info, int misalign,
51202 enum vect_cost_model_location where)
51204 unsigned *cost = (unsigned *) data;
51205 unsigned retval = 0;
51207 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
51208 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
51210 /* Statements in an inner loop relative to the loop being
51211 vectorized are weighted more heavily. The value here is
51212 arbitrary and could potentially be improved with analysis. */
51213 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
51214 count *= 50; /* FIXME. */
51216 retval = (unsigned) (count * stmt_cost);
51218 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
51219 for Silvermont as it has out of order integer pipeline and can execute
51220 2 scalar instruction per tick, but has in order SIMD pipeline. */
51221 if (TARGET_SILVERMONT || TARGET_INTEL)
51222 if (stmt_info && stmt_info->stmt)
51224 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
51225 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
51226 retval = (retval * 17) / 10;
51229 cost[where] += retval;
51231 return retval;
51234 /* Implement targetm.vectorize.finish_cost. */
51236 static void
51237 ix86_finish_cost (void *data, unsigned *prologue_cost,
51238 unsigned *body_cost, unsigned *epilogue_cost)
51240 unsigned *cost = (unsigned *) data;
51241 *prologue_cost = cost[vect_prologue];
51242 *body_cost = cost[vect_body];
51243 *epilogue_cost = cost[vect_epilogue];
51246 /* Implement targetm.vectorize.destroy_cost_data. */
51248 static void
51249 ix86_destroy_cost_data (void *data)
51251 free (data);
51254 /* Validate target specific memory model bits in VAL. */
51256 static unsigned HOST_WIDE_INT
51257 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
51259 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
51260 bool strong;
51262 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
51263 |MEMMODEL_MASK)
51264 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
51266 warning (OPT_Winvalid_memory_model,
51267 "Unknown architecture specific memory model");
51268 return MEMMODEL_SEQ_CST;
51270 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
51271 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
51273 warning (OPT_Winvalid_memory_model,
51274 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
51275 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
51277 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
51279 warning (OPT_Winvalid_memory_model,
51280 "HLE_RELEASE not used with RELEASE or stronger memory model");
51281 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
51283 return val;
51286 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
51287 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
51288 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
51289 or number of vecsize_mangle variants that should be emitted. */
51291 static int
51292 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
51293 struct cgraph_simd_clone *clonei,
51294 tree base_type, int num)
51296 int ret = 1;
51298 if (clonei->simdlen
51299 && (clonei->simdlen < 2
51300 || clonei->simdlen > 16
51301 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
51303 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51304 "unsupported simdlen %d", clonei->simdlen);
51305 return 0;
51308 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
51309 if (TREE_CODE (ret_type) != VOID_TYPE)
51310 switch (TYPE_MODE (ret_type))
51312 case QImode:
51313 case HImode:
51314 case SImode:
51315 case DImode:
51316 case SFmode:
51317 case DFmode:
51318 /* case SCmode: */
51319 /* case DCmode: */
51320 break;
51321 default:
51322 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51323 "unsupported return type %qT for simd\n", ret_type);
51324 return 0;
51327 tree t;
51328 int i;
51330 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
51331 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
51332 switch (TYPE_MODE (TREE_TYPE (t)))
51334 case QImode:
51335 case HImode:
51336 case SImode:
51337 case DImode:
51338 case SFmode:
51339 case DFmode:
51340 /* case SCmode: */
51341 /* case DCmode: */
51342 break;
51343 default:
51344 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51345 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
51346 return 0;
51349 if (clonei->cilk_elemental)
51351 /* Parse here processor clause. If not present, default to 'b'. */
51352 clonei->vecsize_mangle = 'b';
51354 else if (!TREE_PUBLIC (node->decl))
51356 /* If the function isn't exported, we can pick up just one ISA
51357 for the clones. */
51358 if (TARGET_AVX2)
51359 clonei->vecsize_mangle = 'd';
51360 else if (TARGET_AVX)
51361 clonei->vecsize_mangle = 'c';
51362 else
51363 clonei->vecsize_mangle = 'b';
51364 ret = 1;
51366 else
51368 clonei->vecsize_mangle = "bcd"[num];
51369 ret = 3;
51371 switch (clonei->vecsize_mangle)
51373 case 'b':
51374 clonei->vecsize_int = 128;
51375 clonei->vecsize_float = 128;
51376 break;
51377 case 'c':
51378 clonei->vecsize_int = 128;
51379 clonei->vecsize_float = 256;
51380 break;
51381 case 'd':
51382 clonei->vecsize_int = 256;
51383 clonei->vecsize_float = 256;
51384 break;
51386 if (clonei->simdlen == 0)
51388 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
51389 clonei->simdlen = clonei->vecsize_int;
51390 else
51391 clonei->simdlen = clonei->vecsize_float;
51392 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
51393 if (clonei->simdlen > 16)
51394 clonei->simdlen = 16;
51396 return ret;
51399 /* Add target attribute to SIMD clone NODE if needed. */
51401 static void
51402 ix86_simd_clone_adjust (struct cgraph_node *node)
51404 const char *str = NULL;
51405 gcc_assert (node->decl == cfun->decl);
51406 switch (node->simdclone->vecsize_mangle)
51408 case 'b':
51409 if (!TARGET_SSE2)
51410 str = "sse2";
51411 break;
51412 case 'c':
51413 if (!TARGET_AVX)
51414 str = "avx";
51415 break;
51416 case 'd':
51417 if (!TARGET_AVX2)
51418 str = "avx2";
51419 break;
51420 default:
51421 gcc_unreachable ();
51423 if (str == NULL)
51424 return;
51425 push_cfun (NULL);
51426 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
51427 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
51428 gcc_assert (ok);
51429 pop_cfun ();
51430 ix86_reset_previous_fndecl ();
51431 ix86_set_current_function (node->decl);
51434 /* If SIMD clone NODE can't be used in a vectorized loop
51435 in current function, return -1, otherwise return a badness of using it
51436 (0 if it is most desirable from vecsize_mangle point of view, 1
51437 slightly less desirable, etc.). */
51439 static int
51440 ix86_simd_clone_usable (struct cgraph_node *node)
51442 switch (node->simdclone->vecsize_mangle)
51444 case 'b':
51445 if (!TARGET_SSE2)
51446 return -1;
51447 if (!TARGET_AVX)
51448 return 0;
51449 return TARGET_AVX2 ? 2 : 1;
51450 case 'c':
51451 if (!TARGET_AVX)
51452 return -1;
51453 return TARGET_AVX2 ? 1 : 0;
51454 break;
51455 case 'd':
51456 if (!TARGET_AVX2)
51457 return -1;
51458 return 0;
51459 default:
51460 gcc_unreachable ();
51464 /* This function adjusts the unroll factor based on
51465 the hardware capabilities. For ex, bdver3 has
51466 a loop buffer which makes unrolling of smaller
51467 loops less important. This function decides the
51468 unroll factor using number of memory references
51469 (value 32 is used) as a heuristic. */
51471 static unsigned
51472 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
51474 basic_block *bbs;
51475 rtx_insn *insn;
51476 unsigned i;
51477 unsigned mem_count = 0;
51479 if (!TARGET_ADJUST_UNROLL)
51480 return nunroll;
51482 /* Count the number of memory references within the loop body.
51483 This value determines the unrolling factor for bdver3 and bdver4
51484 architectures. */
51485 subrtx_iterator::array_type array;
51486 bbs = get_loop_body (loop);
51487 for (i = 0; i < loop->num_nodes; i++)
51488 FOR_BB_INSNS (bbs[i], insn)
51489 if (NONDEBUG_INSN_P (insn))
51490 FOR_EACH_SUBRTX (iter, array, insn, NONCONST)
51491 if (const_rtx x = *iter)
51492 if (MEM_P (x))
51494 machine_mode mode = GET_MODE (x);
51495 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
51496 if (n_words > 4)
51497 mem_count += 2;
51498 else
51499 mem_count += 1;
51501 free (bbs);
51503 if (mem_count && mem_count <=32)
51504 return 32/mem_count;
51506 return nunroll;
51510 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
51512 static bool
51513 ix86_float_exceptions_rounding_supported_p (void)
51515 /* For x87 floating point with standard excess precision handling,
51516 there is no adddf3 pattern (since x87 floating point only has
51517 XFmode operations) so the default hook implementation gets this
51518 wrong. */
51519 return TARGET_80387 || TARGET_SSE_MATH;
51522 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
51524 static void
51525 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
51527 if (!TARGET_80387 && !TARGET_SSE_MATH)
51528 return;
51529 tree exceptions_var = create_tmp_var (integer_type_node);
51530 if (TARGET_80387)
51532 tree fenv_index_type = build_index_type (size_int (6));
51533 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
51534 tree fenv_var = create_tmp_var (fenv_type);
51535 mark_addressable (fenv_var);
51536 tree fenv_ptr = build_pointer_type (fenv_type);
51537 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
51538 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
51539 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
51540 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
51541 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
51542 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
51543 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
51544 tree hold_fnclex = build_call_expr (fnclex, 0);
51545 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
51546 hold_fnclex);
51547 *clear = build_call_expr (fnclex, 0);
51548 tree sw_var = create_tmp_var (short_unsigned_type_node);
51549 tree fnstsw_call = build_call_expr (fnstsw, 0);
51550 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
51551 sw_var, fnstsw_call);
51552 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
51553 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
51554 exceptions_var, exceptions_x87);
51555 *update = build2 (COMPOUND_EXPR, integer_type_node,
51556 sw_mod, update_mod);
51557 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
51558 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
51560 if (TARGET_SSE_MATH)
51562 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node);
51563 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node);
51564 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
51565 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
51566 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
51567 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
51568 mxcsr_orig_var, stmxcsr_hold_call);
51569 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
51570 mxcsr_orig_var,
51571 build_int_cst (unsigned_type_node, 0x1f80));
51572 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
51573 build_int_cst (unsigned_type_node, 0xffffffc0));
51574 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
51575 mxcsr_mod_var, hold_mod_val);
51576 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51577 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
51578 hold_assign_orig, hold_assign_mod);
51579 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
51580 ldmxcsr_hold_call);
51581 if (*hold)
51582 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
51583 else
51584 *hold = hold_all;
51585 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51586 if (*clear)
51587 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
51588 ldmxcsr_clear_call);
51589 else
51590 *clear = ldmxcsr_clear_call;
51591 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
51592 tree exceptions_sse = fold_convert (integer_type_node,
51593 stxmcsr_update_call);
51594 if (*update)
51596 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
51597 exceptions_var, exceptions_sse);
51598 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
51599 exceptions_var, exceptions_mod);
51600 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
51601 exceptions_assign);
51603 else
51604 *update = build2 (MODIFY_EXPR, integer_type_node,
51605 exceptions_var, exceptions_sse);
51606 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
51607 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51608 ldmxcsr_update_call);
51610 tree atomic_feraiseexcept
51611 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
51612 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
51613 1, exceptions_var);
51614 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51615 atomic_feraiseexcept_call);
51618 /* Return mode to be used for bounds or VOIDmode
51619 if bounds are not supported. */
51621 static enum machine_mode
51622 ix86_mpx_bound_mode ()
51624 /* Do not support pointer checker if MPX
51625 is not enabled. */
51626 if (!TARGET_MPX)
51628 if (flag_check_pointer_bounds)
51629 warning (0, "Pointer Checker requires MPX support on this target."
51630 " Use -mmpx options to enable MPX.");
51631 return VOIDmode;
51634 return BNDmode;
51637 /* Return constant used to statically initialize constant bounds.
51639 This function is used to create special bound values. For now
51640 only INIT bounds and NONE bounds are expected. More special
51641 values may be added later. */
51643 static tree
51644 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
51646 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
51647 : build_zero_cst (pointer_sized_int_node);
51648 tree high = ub ? build_zero_cst (pointer_sized_int_node)
51649 : build_minus_one_cst (pointer_sized_int_node);
51651 /* This function is supposed to be used to create INIT and
51652 NONE bounds only. */
51653 gcc_assert ((lb == 0 && ub == -1)
51654 || (lb == -1 && ub == 0));
51656 return build_complex (NULL, low, high);
51659 /* Generate a list of statements STMTS to initialize pointer bounds
51660 variable VAR with bounds LB and UB. Return the number of generated
51661 statements. */
51663 static int
51664 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
51666 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
51667 tree lhs, modify, var_p;
51669 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
51670 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
51672 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
51673 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
51674 append_to_statement_list (modify, stmts);
51676 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
51677 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
51678 TYPE_SIZE_UNIT (pointer_sized_int_node)));
51679 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
51680 append_to_statement_list (modify, stmts);
51682 return 2;
51685 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
51686 /* For i386, common symbol is local only for non-PIE binaries. For
51687 x86-64, common symbol is local only for non-PIE binaries or linker
51688 supports copy reloc in PIE binaries. */
51690 static bool
51691 ix86_binds_local_p (const_tree exp)
51693 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
51694 (!flag_pic
51695 || (TARGET_64BIT
51696 && HAVE_LD_PIE_COPYRELOC != 0)));
51698 #endif
51700 /* If MEM is in the form of [base+offset], extract the two parts
51701 of address and set to BASE and OFFSET, otherwise return false. */
51703 static bool
51704 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
51706 rtx addr;
51708 gcc_assert (MEM_P (mem));
51710 addr = XEXP (mem, 0);
51712 if (GET_CODE (addr) == CONST)
51713 addr = XEXP (addr, 0);
51715 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
51717 *base = addr;
51718 *offset = const0_rtx;
51719 return true;
51722 if (GET_CODE (addr) == PLUS
51723 && (REG_P (XEXP (addr, 0))
51724 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
51725 && CONST_INT_P (XEXP (addr, 1)))
51727 *base = XEXP (addr, 0);
51728 *offset = XEXP (addr, 1);
51729 return true;
51732 return false;
51735 /* Given OPERANDS of consecutive load/store, check if we can merge
51736 them into move multiple. LOAD is true if they are load instructions.
51737 MODE is the mode of memory operands. */
51739 bool
51740 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
51741 enum machine_mode mode)
51743 HOST_WIDE_INT offval_1, offval_2, msize;
51744 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
51746 if (load)
51748 mem_1 = operands[1];
51749 mem_2 = operands[3];
51750 reg_1 = operands[0];
51751 reg_2 = operands[2];
51753 else
51755 mem_1 = operands[0];
51756 mem_2 = operands[2];
51757 reg_1 = operands[1];
51758 reg_2 = operands[3];
51761 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
51763 if (REGNO (reg_1) != REGNO (reg_2))
51764 return false;
51766 /* Check if the addresses are in the form of [base+offset]. */
51767 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
51768 return false;
51769 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
51770 return false;
51772 /* Check if the bases are the same. */
51773 if (!rtx_equal_p (base_1, base_2))
51774 return false;
51776 offval_1 = INTVAL (offset_1);
51777 offval_2 = INTVAL (offset_2);
51778 msize = GET_MODE_SIZE (mode);
51779 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
51780 if (offval_1 + msize != offval_2)
51781 return false;
51783 return true;
51786 /* Initialize the GCC target structure. */
51787 #undef TARGET_RETURN_IN_MEMORY
51788 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
51790 #undef TARGET_LEGITIMIZE_ADDRESS
51791 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
51793 #undef TARGET_ATTRIBUTE_TABLE
51794 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
51795 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
51796 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
51797 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51798 # undef TARGET_MERGE_DECL_ATTRIBUTES
51799 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
51800 #endif
51802 #undef TARGET_COMP_TYPE_ATTRIBUTES
51803 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
51805 #undef TARGET_INIT_BUILTINS
51806 #define TARGET_INIT_BUILTINS ix86_init_builtins
51807 #undef TARGET_BUILTIN_DECL
51808 #define TARGET_BUILTIN_DECL ix86_builtin_decl
51809 #undef TARGET_EXPAND_BUILTIN
51810 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
51812 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
51813 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
51814 ix86_builtin_vectorized_function
51816 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
51817 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
51819 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
51820 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
51822 #undef TARGET_VECTORIZE_BUILTIN_GATHER
51823 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
51825 #undef TARGET_BUILTIN_RECIPROCAL
51826 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
51828 #undef TARGET_ASM_FUNCTION_EPILOGUE
51829 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
51831 #undef TARGET_ENCODE_SECTION_INFO
51832 #ifndef SUBTARGET_ENCODE_SECTION_INFO
51833 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
51834 #else
51835 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
51836 #endif
51838 #undef TARGET_ASM_OPEN_PAREN
51839 #define TARGET_ASM_OPEN_PAREN ""
51840 #undef TARGET_ASM_CLOSE_PAREN
51841 #define TARGET_ASM_CLOSE_PAREN ""
51843 #undef TARGET_ASM_BYTE_OP
51844 #define TARGET_ASM_BYTE_OP ASM_BYTE
51846 #undef TARGET_ASM_ALIGNED_HI_OP
51847 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
51848 #undef TARGET_ASM_ALIGNED_SI_OP
51849 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
51850 #ifdef ASM_QUAD
51851 #undef TARGET_ASM_ALIGNED_DI_OP
51852 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
51853 #endif
51855 #undef TARGET_PROFILE_BEFORE_PROLOGUE
51856 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
51858 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
51859 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
51861 #undef TARGET_ASM_UNALIGNED_HI_OP
51862 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
51863 #undef TARGET_ASM_UNALIGNED_SI_OP
51864 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
51865 #undef TARGET_ASM_UNALIGNED_DI_OP
51866 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
51868 #undef TARGET_PRINT_OPERAND
51869 #define TARGET_PRINT_OPERAND ix86_print_operand
51870 #undef TARGET_PRINT_OPERAND_ADDRESS
51871 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
51872 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
51873 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
51874 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
51875 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
51877 #undef TARGET_SCHED_INIT_GLOBAL
51878 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
51879 #undef TARGET_SCHED_ADJUST_COST
51880 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
51881 #undef TARGET_SCHED_ISSUE_RATE
51882 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
51883 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
51884 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
51885 ia32_multipass_dfa_lookahead
51886 #undef TARGET_SCHED_MACRO_FUSION_P
51887 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
51888 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
51889 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
51891 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
51892 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
51894 #undef TARGET_MEMMODEL_CHECK
51895 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
51897 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
51898 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
51900 #ifdef HAVE_AS_TLS
51901 #undef TARGET_HAVE_TLS
51902 #define TARGET_HAVE_TLS true
51903 #endif
51904 #undef TARGET_CANNOT_FORCE_CONST_MEM
51905 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
51906 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
51907 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
51909 #undef TARGET_DELEGITIMIZE_ADDRESS
51910 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
51912 #undef TARGET_MS_BITFIELD_LAYOUT_P
51913 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
51915 #if TARGET_MACHO
51916 #undef TARGET_BINDS_LOCAL_P
51917 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
51918 #else
51919 #undef TARGET_BINDS_LOCAL_P
51920 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
51921 #endif
51922 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51923 #undef TARGET_BINDS_LOCAL_P
51924 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
51925 #endif
51927 #undef TARGET_ASM_OUTPUT_MI_THUNK
51928 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
51929 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
51930 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
51932 #undef TARGET_ASM_FILE_START
51933 #define TARGET_ASM_FILE_START x86_file_start
51935 #undef TARGET_OPTION_OVERRIDE
51936 #define TARGET_OPTION_OVERRIDE ix86_option_override
51938 #undef TARGET_REGISTER_MOVE_COST
51939 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
51940 #undef TARGET_MEMORY_MOVE_COST
51941 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
51942 #undef TARGET_RTX_COSTS
51943 #define TARGET_RTX_COSTS ix86_rtx_costs
51944 #undef TARGET_ADDRESS_COST
51945 #define TARGET_ADDRESS_COST ix86_address_cost
51947 #undef TARGET_FIXED_CONDITION_CODE_REGS
51948 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
51949 #undef TARGET_CC_MODES_COMPATIBLE
51950 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
51952 #undef TARGET_MACHINE_DEPENDENT_REORG
51953 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
51955 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
51956 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
51958 #undef TARGET_BUILD_BUILTIN_VA_LIST
51959 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
51961 #undef TARGET_FOLD_BUILTIN
51962 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
51964 #undef TARGET_COMPARE_VERSION_PRIORITY
51965 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
51967 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
51968 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
51969 ix86_generate_version_dispatcher_body
51971 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
51972 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
51973 ix86_get_function_versions_dispatcher
51975 #undef TARGET_ENUM_VA_LIST_P
51976 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
51978 #undef TARGET_FN_ABI_VA_LIST
51979 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
51981 #undef TARGET_CANONICAL_VA_LIST_TYPE
51982 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
51984 #undef TARGET_EXPAND_BUILTIN_VA_START
51985 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
51987 #undef TARGET_MD_ASM_CLOBBERS
51988 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
51990 #undef TARGET_PROMOTE_PROTOTYPES
51991 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
51992 #undef TARGET_SETUP_INCOMING_VARARGS
51993 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
51994 #undef TARGET_MUST_PASS_IN_STACK
51995 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
51996 #undef TARGET_FUNCTION_ARG_ADVANCE
51997 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
51998 #undef TARGET_FUNCTION_ARG
51999 #define TARGET_FUNCTION_ARG ix86_function_arg
52000 #undef TARGET_INIT_PIC_REG
52001 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
52002 #undef TARGET_USE_PSEUDO_PIC_REG
52003 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
52004 #undef TARGET_FUNCTION_ARG_BOUNDARY
52005 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
52006 #undef TARGET_PASS_BY_REFERENCE
52007 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
52008 #undef TARGET_INTERNAL_ARG_POINTER
52009 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
52010 #undef TARGET_UPDATE_STACK_BOUNDARY
52011 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
52012 #undef TARGET_GET_DRAP_RTX
52013 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
52014 #undef TARGET_STRICT_ARGUMENT_NAMING
52015 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
52016 #undef TARGET_STATIC_CHAIN
52017 #define TARGET_STATIC_CHAIN ix86_static_chain
52018 #undef TARGET_TRAMPOLINE_INIT
52019 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
52020 #undef TARGET_RETURN_POPS_ARGS
52021 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
52023 #undef TARGET_LEGITIMATE_COMBINED_INSN
52024 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
52026 #undef TARGET_ASAN_SHADOW_OFFSET
52027 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
52029 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
52030 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
52032 #undef TARGET_SCALAR_MODE_SUPPORTED_P
52033 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
52035 #undef TARGET_VECTOR_MODE_SUPPORTED_P
52036 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
52038 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
52039 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
52040 ix86_libgcc_floating_mode_supported_p
52042 #undef TARGET_C_MODE_FOR_SUFFIX
52043 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
52045 #ifdef HAVE_AS_TLS
52046 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
52047 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
52048 #endif
52050 #ifdef SUBTARGET_INSERT_ATTRIBUTES
52051 #undef TARGET_INSERT_ATTRIBUTES
52052 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
52053 #endif
52055 #undef TARGET_MANGLE_TYPE
52056 #define TARGET_MANGLE_TYPE ix86_mangle_type
52058 #if !TARGET_MACHO
52059 #undef TARGET_STACK_PROTECT_FAIL
52060 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
52061 #endif
52063 #undef TARGET_FUNCTION_VALUE
52064 #define TARGET_FUNCTION_VALUE ix86_function_value
52066 #undef TARGET_FUNCTION_VALUE_REGNO_P
52067 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
52069 #undef TARGET_PROMOTE_FUNCTION_MODE
52070 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
52072 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
52073 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
52075 #undef TARGET_MEMBER_TYPE_FORCES_BLK
52076 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
52078 #undef TARGET_INSTANTIATE_DECLS
52079 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
52081 #undef TARGET_SECONDARY_RELOAD
52082 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
52084 #undef TARGET_CLASS_MAX_NREGS
52085 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
52087 #undef TARGET_PREFERRED_RELOAD_CLASS
52088 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
52089 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
52090 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
52091 #undef TARGET_CLASS_LIKELY_SPILLED_P
52092 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
52094 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
52095 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
52096 ix86_builtin_vectorization_cost
52097 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
52098 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
52099 ix86_vectorize_vec_perm_const_ok
52100 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
52101 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
52102 ix86_preferred_simd_mode
52103 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
52104 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
52105 ix86_autovectorize_vector_sizes
52106 #undef TARGET_VECTORIZE_INIT_COST
52107 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
52108 #undef TARGET_VECTORIZE_ADD_STMT_COST
52109 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
52110 #undef TARGET_VECTORIZE_FINISH_COST
52111 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
52112 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
52113 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
52115 #undef TARGET_SET_CURRENT_FUNCTION
52116 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
52118 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
52119 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
52121 #undef TARGET_OPTION_SAVE
52122 #define TARGET_OPTION_SAVE ix86_function_specific_save
52124 #undef TARGET_OPTION_RESTORE
52125 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
52127 #undef TARGET_OPTION_POST_STREAM_IN
52128 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
52130 #undef TARGET_OPTION_PRINT
52131 #define TARGET_OPTION_PRINT ix86_function_specific_print
52133 #undef TARGET_OPTION_FUNCTION_VERSIONS
52134 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
52136 #undef TARGET_CAN_INLINE_P
52137 #define TARGET_CAN_INLINE_P ix86_can_inline_p
52139 #undef TARGET_EXPAND_TO_RTL_HOOK
52140 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
52142 #undef TARGET_LEGITIMATE_ADDRESS_P
52143 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
52145 #undef TARGET_LRA_P
52146 #define TARGET_LRA_P hook_bool_void_true
52148 #undef TARGET_REGISTER_PRIORITY
52149 #define TARGET_REGISTER_PRIORITY ix86_register_priority
52151 #undef TARGET_REGISTER_USAGE_LEVELING_P
52152 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
52154 #undef TARGET_LEGITIMATE_CONSTANT_P
52155 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
52157 #undef TARGET_FRAME_POINTER_REQUIRED
52158 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
52160 #undef TARGET_CAN_ELIMINATE
52161 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
52163 #undef TARGET_EXTRA_LIVE_ON_ENTRY
52164 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
52166 #undef TARGET_ASM_CODE_END
52167 #define TARGET_ASM_CODE_END ix86_code_end
52169 #undef TARGET_CONDITIONAL_REGISTER_USAGE
52170 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
52172 #if TARGET_MACHO
52173 #undef TARGET_INIT_LIBFUNCS
52174 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
52175 #endif
52177 #undef TARGET_LOOP_UNROLL_ADJUST
52178 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
52180 #undef TARGET_SPILL_CLASS
52181 #define TARGET_SPILL_CLASS ix86_spill_class
52183 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
52184 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
52185 ix86_simd_clone_compute_vecsize_and_simdlen
52187 #undef TARGET_SIMD_CLONE_ADJUST
52188 #define TARGET_SIMD_CLONE_ADJUST \
52189 ix86_simd_clone_adjust
52191 #undef TARGET_SIMD_CLONE_USABLE
52192 #define TARGET_SIMD_CLONE_USABLE \
52193 ix86_simd_clone_usable
52195 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
52196 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
52197 ix86_float_exceptions_rounding_supported_p
52199 #undef TARGET_MODE_EMIT
52200 #define TARGET_MODE_EMIT ix86_emit_mode_set
52202 #undef TARGET_MODE_NEEDED
52203 #define TARGET_MODE_NEEDED ix86_mode_needed
52205 #undef TARGET_MODE_AFTER
52206 #define TARGET_MODE_AFTER ix86_mode_after
52208 #undef TARGET_MODE_ENTRY
52209 #define TARGET_MODE_ENTRY ix86_mode_entry
52211 #undef TARGET_MODE_EXIT
52212 #define TARGET_MODE_EXIT ix86_mode_exit
52214 #undef TARGET_MODE_PRIORITY
52215 #define TARGET_MODE_PRIORITY ix86_mode_priority
52217 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
52218 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
52220 #undef TARGET_LOAD_BOUNDS_FOR_ARG
52221 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
52223 #undef TARGET_STORE_BOUNDS_FOR_ARG
52224 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
52226 #undef TARGET_LOAD_RETURNED_BOUNDS
52227 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
52229 #undef TARGET_STORE_RETURNED_BOUNDS
52230 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
52232 #undef TARGET_CHKP_BOUND_MODE
52233 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
52235 #undef TARGET_BUILTIN_CHKP_FUNCTION
52236 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
52238 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
52239 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
52241 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
52242 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
52244 #undef TARGET_CHKP_INITIALIZE_BOUNDS
52245 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
52247 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
52248 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
52250 #undef TARGET_OFFLOAD_OPTIONS
52251 #define TARGET_OFFLOAD_OPTIONS \
52252 ix86_offload_options
52254 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
52255 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
52257 struct gcc_target targetm = TARGET_INITIALIZER;
52259 #include "gt-i386.h"